2011-05-02 17:45:40 +01:00
|
|
|
/*
|
|
|
|
* Copyright © 2011 Intel Corporation
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
* Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "brw_vec4.h"
|
2011-05-04 20:50:16 +01:00
|
|
|
extern "C" {
|
2011-05-02 17:45:40 +01:00
|
|
|
#include "main/macros.h"
|
2011-05-04 20:50:16 +01:00
|
|
|
#include "program/prog_parameter.h"
|
|
|
|
}
|
2011-05-02 17:45:40 +01:00
|
|
|
|
|
|
|
namespace brw {
|
|
|
|
|
|
|
|
src_reg::src_reg(dst_reg reg)
|
|
|
|
{
|
|
|
|
init();
|
|
|
|
|
|
|
|
this->file = reg.file;
|
|
|
|
this->reg = reg.reg;
|
|
|
|
this->reg_offset = reg.reg_offset;
|
|
|
|
this->type = reg.type;
|
2011-08-07 18:59:39 +01:00
|
|
|
this->reladdr = reg.reladdr;
|
2011-08-16 04:59:24 +01:00
|
|
|
this->fixed_hw_reg = reg.fixed_hw_reg;
|
2011-05-02 17:45:40 +01:00
|
|
|
|
|
|
|
int swizzles[4];
|
|
|
|
int next_chan = 0;
|
|
|
|
int last = 0;
|
|
|
|
|
|
|
|
for (int i = 0; i < 4; i++) {
|
|
|
|
if (!(reg.writemask & (1 << i)))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
swizzles[next_chan++] = last = i;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (; next_chan < 4; next_chan++) {
|
|
|
|
swizzles[next_chan] = last;
|
|
|
|
}
|
|
|
|
|
|
|
|
this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
|
|
|
|
swizzles[2], swizzles[3]);
|
|
|
|
}
|
|
|
|
|
|
|
|
dst_reg::dst_reg(src_reg reg)
|
|
|
|
{
|
|
|
|
init();
|
|
|
|
|
|
|
|
this->file = reg.file;
|
|
|
|
this->reg = reg.reg;
|
|
|
|
this->reg_offset = reg.reg_offset;
|
|
|
|
this->type = reg.type;
|
|
|
|
this->writemask = WRITEMASK_XYZW;
|
2011-08-07 18:59:39 +01:00
|
|
|
this->reladdr = reg.reladdr;
|
2011-08-16 04:59:24 +01:00
|
|
|
this->fixed_hw_reg = reg.fixed_hw_reg;
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
|
|
|
|
2011-08-27 00:37:37 +01:00
|
|
|
vec4_instruction::vec4_instruction(vec4_visitor *v,
|
|
|
|
enum opcode opcode, dst_reg dst,
|
|
|
|
src_reg src0, src_reg src1, src_reg src2)
|
2011-05-02 17:45:40 +01:00
|
|
|
{
|
2011-08-27 00:37:37 +01:00
|
|
|
this->opcode = opcode;
|
|
|
|
this->dst = dst;
|
|
|
|
this->src[0] = src0;
|
|
|
|
this->src[1] = src1;
|
|
|
|
this->src[2] = src2;
|
|
|
|
this->ir = v->base_ir;
|
|
|
|
this->annotation = v->current_annotation;
|
|
|
|
}
|
2011-05-02 17:45:40 +01:00
|
|
|
|
2011-08-27 00:37:37 +01:00
|
|
|
vec4_instruction *
|
|
|
|
vec4_visitor::emit(vec4_instruction *inst)
|
|
|
|
{
|
2011-05-02 17:45:40 +01:00
|
|
|
this->instructions.push_tail(inst);
|
|
|
|
|
|
|
|
return inst;
|
|
|
|
}
|
|
|
|
|
2011-08-27 19:13:33 +01:00
|
|
|
vec4_instruction *
|
|
|
|
vec4_visitor::emit_before(vec4_instruction *inst, vec4_instruction *new_inst)
|
|
|
|
{
|
|
|
|
new_inst->ir = inst->ir;
|
|
|
|
new_inst->annotation = inst->annotation;
|
|
|
|
|
|
|
|
inst->insert_before(new_inst);
|
|
|
|
|
|
|
|
return inst;
|
|
|
|
}
|
|
|
|
|
2011-08-27 00:37:37 +01:00
|
|
|
vec4_instruction *
|
|
|
|
vec4_visitor::emit(enum opcode opcode, dst_reg dst,
|
|
|
|
src_reg src0, src_reg src1, src_reg src2)
|
|
|
|
{
|
|
|
|
return emit(new(mem_ctx) vec4_instruction(this, opcode, dst,
|
|
|
|
src0, src1, src2));
|
|
|
|
}
|
|
|
|
|
2011-05-02 17:45:40 +01:00
|
|
|
|
|
|
|
vec4_instruction *
|
|
|
|
vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
|
|
|
|
{
|
2011-08-27 00:37:37 +01:00
|
|
|
return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0, src1));
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
vec4_instruction *
|
|
|
|
vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
|
|
|
|
{
|
2011-08-27 00:37:37 +01:00
|
|
|
return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0));
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
vec4_instruction *
|
|
|
|
vec4_visitor::emit(enum opcode opcode)
|
|
|
|
{
|
2011-08-27 00:37:37 +01:00
|
|
|
return emit(new(mem_ctx) vec4_instruction(this, opcode, dst_reg()));
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
|
|
|
|
2011-08-27 00:43:06 +01:00
|
|
|
#define ALU1(op) \
|
|
|
|
vec4_instruction * \
|
|
|
|
vec4_visitor::op(dst_reg dst, src_reg src0) \
|
|
|
|
{ \
|
|
|
|
return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
|
|
|
|
src0); \
|
|
|
|
}
|
|
|
|
|
|
|
|
#define ALU2(op) \
|
|
|
|
vec4_instruction * \
|
|
|
|
vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1) \
|
|
|
|
{ \
|
|
|
|
return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
|
|
|
|
src0, src1); \
|
|
|
|
}
|
|
|
|
|
|
|
|
ALU1(NOT)
|
|
|
|
ALU1(MOV)
|
|
|
|
ALU1(FRC)
|
|
|
|
ALU1(RNDD)
|
|
|
|
ALU1(RNDE)
|
|
|
|
ALU1(RNDZ)
|
|
|
|
ALU2(ADD)
|
|
|
|
ALU2(MUL)
|
|
|
|
ALU2(MACH)
|
|
|
|
ALU2(AND)
|
|
|
|
ALU2(OR)
|
|
|
|
ALU2(XOR)
|
|
|
|
ALU2(DP3)
|
|
|
|
ALU2(DP4)
|
|
|
|
|
|
|
|
/** Gen4 predicated IF. */
|
|
|
|
vec4_instruction *
|
|
|
|
vec4_visitor::IF(uint32_t predicate)
|
|
|
|
{
|
|
|
|
vec4_instruction *inst;
|
|
|
|
|
|
|
|
inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF);
|
|
|
|
inst->predicate = predicate;
|
|
|
|
|
|
|
|
return inst;
|
|
|
|
}
|
|
|
|
|
|
|
|
/** Gen6+ IF with embedded comparison. */
|
|
|
|
vec4_instruction *
|
|
|
|
vec4_visitor::IF(src_reg src0, src_reg src1, uint32_t condition)
|
|
|
|
{
|
|
|
|
assert(intel->gen >= 6);
|
|
|
|
|
|
|
|
vec4_instruction *inst;
|
|
|
|
|
2011-10-03 23:31:52 +01:00
|
|
|
resolve_ud_negate(&src0);
|
|
|
|
resolve_ud_negate(&src1);
|
|
|
|
|
2011-08-27 00:43:06 +01:00
|
|
|
inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF, dst_null_d(),
|
|
|
|
src0, src1);
|
|
|
|
inst->conditional_mod = condition;
|
|
|
|
|
|
|
|
return inst;
|
|
|
|
}
|
|
|
|
|
2011-08-27 18:59:43 +01:00
|
|
|
/**
|
|
|
|
* CMP: Sets the low bit of the destination channels with the result
|
|
|
|
* of the comparison, while the upper bits are undefined, and updates
|
|
|
|
* the flag register with the packed 16 bits of the result.
|
|
|
|
*/
|
2011-08-27 00:43:06 +01:00
|
|
|
vec4_instruction *
|
|
|
|
vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1, uint32_t condition)
|
|
|
|
{
|
|
|
|
vec4_instruction *inst;
|
|
|
|
|
2011-08-27 18:59:43 +01:00
|
|
|
/* original gen4 does type conversion to the destination type
|
|
|
|
* before before comparison, producing garbage results for floating
|
|
|
|
* point comparisons.
|
|
|
|
*/
|
2011-09-03 00:46:46 +01:00
|
|
|
if (intel->gen == 4) {
|
2011-08-27 18:59:43 +01:00
|
|
|
dst.type = src0.type;
|
2011-09-03 00:46:46 +01:00
|
|
|
if (dst.file == HW_REG)
|
|
|
|
dst.fixed_hw_reg.type = dst.type;
|
|
|
|
}
|
2011-08-27 18:59:43 +01:00
|
|
|
|
2011-10-03 23:31:52 +01:00
|
|
|
resolve_ud_negate(&src0);
|
|
|
|
resolve_ud_negate(&src1);
|
|
|
|
|
2011-08-27 18:59:43 +01:00
|
|
|
inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_CMP, dst, src0, src1);
|
2011-08-27 00:43:06 +01:00
|
|
|
inst->conditional_mod = condition;
|
|
|
|
|
|
|
|
return inst;
|
|
|
|
}
|
|
|
|
|
2011-08-27 19:13:33 +01:00
|
|
|
vec4_instruction *
|
|
|
|
vec4_visitor::SCRATCH_READ(dst_reg dst, src_reg index)
|
|
|
|
{
|
|
|
|
vec4_instruction *inst;
|
|
|
|
|
|
|
|
inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_READ,
|
|
|
|
dst, index);
|
|
|
|
inst->base_mrf = 14;
|
|
|
|
inst->mlen = 1;
|
|
|
|
|
|
|
|
return inst;
|
|
|
|
}
|
|
|
|
|
|
|
|
vec4_instruction *
|
|
|
|
vec4_visitor::SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index)
|
|
|
|
{
|
|
|
|
vec4_instruction *inst;
|
|
|
|
|
|
|
|
inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_WRITE,
|
|
|
|
dst, src, index);
|
|
|
|
inst->base_mrf = 13;
|
|
|
|
inst->mlen = 2;
|
|
|
|
|
|
|
|
return inst;
|
|
|
|
}
|
|
|
|
|
2011-05-02 17:45:40 +01:00
|
|
|
void
|
|
|
|
vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
|
|
|
|
{
|
|
|
|
static enum opcode dot_opcodes[] = {
|
|
|
|
BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
|
|
|
|
};
|
|
|
|
|
|
|
|
emit(dot_opcodes[elements - 2], dst, src0, src1);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
|
|
|
|
{
|
|
|
|
/* The gen6 math instruction ignores the source modifiers --
|
|
|
|
* swizzle, abs, negate, and at least some parts of the register
|
2011-08-06 04:16:21 +01:00
|
|
|
* region description.
|
2011-08-23 20:13:14 +01:00
|
|
|
*
|
|
|
|
* While it would seem that this MOV could be avoided at this point
|
|
|
|
* in the case that the swizzle is matched up with the destination
|
|
|
|
* writemask, note that uniform packing and register allocation
|
|
|
|
* could rearrange our swizzle, so let's leave this matter up to
|
|
|
|
* copy propagation later.
|
2011-05-02 17:45:40 +01:00
|
|
|
*/
|
2011-08-06 04:16:21 +01:00
|
|
|
src_reg temp_src = src_reg(this, glsl_type::vec4_type);
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(MOV(dst_reg(temp_src), src));
|
2011-05-02 17:45:40 +01:00
|
|
|
|
2011-08-09 19:00:28 +01:00
|
|
|
if (dst.writemask != WRITEMASK_XYZW) {
|
|
|
|
/* The gen6 math instruction must be align1, so we can't do
|
|
|
|
* writemasks.
|
|
|
|
*/
|
|
|
|
dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
|
|
|
|
|
|
|
|
emit(opcode, temp_dst, temp_src);
|
|
|
|
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(MOV(dst, src_reg(temp_dst)));
|
2011-08-09 19:00:28 +01:00
|
|
|
} else {
|
|
|
|
emit(opcode, dst, temp_src);
|
|
|
|
}
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
|
|
|
|
{
|
|
|
|
vec4_instruction *inst = emit(opcode, dst, src);
|
|
|
|
inst->base_mrf = 1;
|
|
|
|
inst->mlen = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
|
|
|
|
{
|
|
|
|
switch (opcode) {
|
|
|
|
case SHADER_OPCODE_RCP:
|
|
|
|
case SHADER_OPCODE_RSQ:
|
|
|
|
case SHADER_OPCODE_SQRT:
|
|
|
|
case SHADER_OPCODE_EXP2:
|
|
|
|
case SHADER_OPCODE_LOG2:
|
|
|
|
case SHADER_OPCODE_SIN:
|
|
|
|
case SHADER_OPCODE_COS:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
assert(!"not reached: bad math opcode");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (intel->gen >= 6) {
|
|
|
|
return emit_math1_gen6(opcode, dst, src);
|
|
|
|
} else {
|
|
|
|
return emit_math1_gen4(opcode, dst, src);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
vec4_visitor::emit_math2_gen6(enum opcode opcode,
|
|
|
|
dst_reg dst, src_reg src0, src_reg src1)
|
|
|
|
{
|
|
|
|
src_reg expanded;
|
|
|
|
|
|
|
|
/* The gen6 math instruction ignores the source modifiers --
|
|
|
|
* swizzle, abs, negate, and at least some parts of the register
|
|
|
|
* region description. Move the sources to temporaries to make it
|
|
|
|
* generally work.
|
|
|
|
*/
|
|
|
|
|
|
|
|
expanded = src_reg(this, glsl_type::vec4_type);
|
2011-09-29 01:37:55 +01:00
|
|
|
expanded.type = src0.type;
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(MOV(dst_reg(expanded), src0));
|
2011-05-02 17:45:40 +01:00
|
|
|
src0 = expanded;
|
|
|
|
|
|
|
|
expanded = src_reg(this, glsl_type::vec4_type);
|
2011-09-29 01:37:55 +01:00
|
|
|
expanded.type = src1.type;
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(MOV(dst_reg(expanded), src1));
|
2011-05-02 17:45:40 +01:00
|
|
|
src1 = expanded;
|
|
|
|
|
2011-08-09 19:00:28 +01:00
|
|
|
if (dst.writemask != WRITEMASK_XYZW) {
|
|
|
|
/* The gen6 math instruction must be align1, so we can't do
|
|
|
|
* writemasks.
|
|
|
|
*/
|
|
|
|
dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
|
2011-09-29 01:37:55 +01:00
|
|
|
temp_dst.type = dst.type;
|
2011-08-09 19:00:28 +01:00
|
|
|
|
|
|
|
emit(opcode, temp_dst, src0, src1);
|
|
|
|
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(MOV(dst, src_reg(temp_dst)));
|
2011-08-09 19:00:28 +01:00
|
|
|
} else {
|
|
|
|
emit(opcode, dst, src0, src1);
|
|
|
|
}
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
vec4_visitor::emit_math2_gen4(enum opcode opcode,
|
|
|
|
dst_reg dst, src_reg src0, src_reg src1)
|
|
|
|
{
|
|
|
|
vec4_instruction *inst = emit(opcode, dst, src0, src1);
|
|
|
|
inst->base_mrf = 1;
|
|
|
|
inst->mlen = 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
vec4_visitor::emit_math(enum opcode opcode,
|
|
|
|
dst_reg dst, src_reg src0, src_reg src1)
|
|
|
|
{
|
2011-09-29 01:37:55 +01:00
|
|
|
switch (opcode) {
|
|
|
|
case SHADER_OPCODE_POW:
|
|
|
|
case SHADER_OPCODE_INT_QUOTIENT:
|
|
|
|
case SHADER_OPCODE_INT_REMAINDER:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
assert(!"not reached: unsupported binary math opcode");
|
|
|
|
return;
|
|
|
|
}
|
2011-05-02 17:45:40 +01:00
|
|
|
|
|
|
|
if (intel->gen >= 6) {
|
|
|
|
return emit_math2_gen6(opcode, dst, src0, src1);
|
|
|
|
} else {
|
|
|
|
return emit_math2_gen4(opcode, dst, src0, src1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
vec4_visitor::visit_instructions(const exec_list *list)
|
|
|
|
{
|
2011-08-12 13:15:50 +01:00
|
|
|
foreach_list(node, list) {
|
|
|
|
ir_instruction *ir = (ir_instruction *)node;
|
2011-05-02 17:45:40 +01:00
|
|
|
|
|
|
|
base_ir = ir;
|
|
|
|
ir->accept(this);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
type_size(const struct glsl_type *type)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
int size;
|
|
|
|
|
|
|
|
switch (type->base_type) {
|
|
|
|
case GLSL_TYPE_UINT:
|
|
|
|
case GLSL_TYPE_INT:
|
|
|
|
case GLSL_TYPE_FLOAT:
|
|
|
|
case GLSL_TYPE_BOOL:
|
|
|
|
if (type->is_matrix()) {
|
|
|
|
return type->matrix_columns;
|
|
|
|
} else {
|
|
|
|
/* Regardless of size of vector, it gets a vec4. This is bad
|
|
|
|
* packing for things like floats, but otherwise arrays become a
|
|
|
|
* mess. Hopefully a later pass over the code can pack scalars
|
|
|
|
* down if appropriate.
|
|
|
|
*/
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
case GLSL_TYPE_ARRAY:
|
|
|
|
assert(type->length > 0);
|
|
|
|
return type_size(type->fields.array) * type->length;
|
|
|
|
case GLSL_TYPE_STRUCT:
|
|
|
|
size = 0;
|
|
|
|
for (i = 0; i < type->length; i++) {
|
|
|
|
size += type_size(type->fields.structure[i].type);
|
|
|
|
}
|
|
|
|
return size;
|
|
|
|
case GLSL_TYPE_SAMPLER:
|
|
|
|
/* Samplers take up one slot in UNIFORMS[], but they're baked in
|
|
|
|
* at link time.
|
|
|
|
*/
|
|
|
|
return 1;
|
|
|
|
default:
|
|
|
|
assert(0);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
vec4_visitor::virtual_grf_alloc(int size)
|
|
|
|
{
|
|
|
|
if (virtual_grf_array_size <= virtual_grf_count) {
|
|
|
|
if (virtual_grf_array_size == 0)
|
|
|
|
virtual_grf_array_size = 16;
|
|
|
|
else
|
|
|
|
virtual_grf_array_size *= 2;
|
|
|
|
virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
|
|
|
|
virtual_grf_array_size);
|
2011-09-01 16:34:18 +01:00
|
|
|
virtual_grf_reg_map = reralloc(mem_ctx, virtual_grf_reg_map, int,
|
|
|
|
virtual_grf_array_size);
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
2011-09-01 16:34:18 +01:00
|
|
|
virtual_grf_reg_map[virtual_grf_count] = virtual_grf_reg_count;
|
|
|
|
virtual_grf_reg_count += size;
|
2011-05-02 17:45:40 +01:00
|
|
|
virtual_grf_sizes[virtual_grf_count] = size;
|
|
|
|
return virtual_grf_count++;
|
|
|
|
}
|
|
|
|
|
|
|
|
src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
|
|
|
|
{
|
|
|
|
init();
|
|
|
|
|
|
|
|
this->file = GRF;
|
|
|
|
this->reg = v->virtual_grf_alloc(type_size(type));
|
|
|
|
|
|
|
|
if (type->is_array() || type->is_record()) {
|
|
|
|
this->swizzle = BRW_SWIZZLE_NOOP;
|
|
|
|
} else {
|
|
|
|
this->swizzle = swizzle_for_size(type->vector_elements);
|
|
|
|
}
|
|
|
|
|
|
|
|
this->type = brw_type_for_base_type(type);
|
|
|
|
}
|
|
|
|
|
|
|
|
dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
|
|
|
|
{
|
|
|
|
init();
|
|
|
|
|
|
|
|
this->file = GRF;
|
|
|
|
this->reg = v->virtual_grf_alloc(type_size(type));
|
|
|
|
|
|
|
|
if (type->is_array() || type->is_record()) {
|
|
|
|
this->writemask = WRITEMASK_XYZW;
|
|
|
|
} else {
|
|
|
|
this->writemask = (1 << type->vector_elements) - 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
this->type = brw_type_for_base_type(type);
|
|
|
|
}
|
|
|
|
|
2011-05-04 20:50:16 +01:00
|
|
|
/* Our support for uniforms is piggy-backed on the struct
|
|
|
|
* gl_fragment_program, because that's where the values actually
|
|
|
|
* get stored, rather than in some global gl_shader_program uniform
|
|
|
|
* store.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
vec4_visitor::setup_uniform_values(int loc, const glsl_type *type)
|
|
|
|
{
|
|
|
|
unsigned int offset = 0;
|
|
|
|
float *values = &this->vp->Base.Parameters->ParameterValues[loc][0].f;
|
|
|
|
|
|
|
|
if (type->is_matrix()) {
|
2011-10-22 02:22:42 +01:00
|
|
|
const glsl_type *column = type->column_type();
|
2011-05-04 20:50:16 +01:00
|
|
|
|
|
|
|
for (unsigned int i = 0; i < type->matrix_columns; i++) {
|
|
|
|
offset += setup_uniform_values(loc + offset, column);
|
|
|
|
}
|
|
|
|
|
|
|
|
return offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (type->base_type) {
|
|
|
|
case GLSL_TYPE_FLOAT:
|
|
|
|
case GLSL_TYPE_UINT:
|
|
|
|
case GLSL_TYPE_INT:
|
|
|
|
case GLSL_TYPE_BOOL:
|
|
|
|
for (unsigned int i = 0; i < type->vector_elements; i++) {
|
2011-08-19 19:51:43 +01:00
|
|
|
c->prog_data.param[this->uniforms * 4 + i] = &values[i];
|
2011-05-04 20:50:16 +01:00
|
|
|
}
|
|
|
|
|
2011-08-19 19:51:43 +01:00
|
|
|
/* Set up pad elements to get things aligned to a vec4 boundary. */
|
2011-05-04 20:50:16 +01:00
|
|
|
for (unsigned int i = type->vector_elements; i < 4; i++) {
|
2011-08-19 19:51:43 +01:00
|
|
|
static float zero = 0;
|
|
|
|
|
|
|
|
c->prog_data.param[this->uniforms * 4 + i] = &zero;
|
2011-05-04 20:50:16 +01:00
|
|
|
}
|
|
|
|
|
2011-08-22 18:35:24 +01:00
|
|
|
/* Track the size of this uniform vector, for future packing of
|
|
|
|
* uniforms.
|
|
|
|
*/
|
|
|
|
this->uniform_vector_size[this->uniforms] = type->vector_elements;
|
2011-05-04 20:50:16 +01:00
|
|
|
this->uniforms++;
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
case GLSL_TYPE_STRUCT:
|
|
|
|
for (unsigned int i = 0; i < type->length; i++) {
|
|
|
|
offset += setup_uniform_values(loc + offset,
|
|
|
|
type->fields.structure[i].type);
|
|
|
|
}
|
|
|
|
return offset;
|
|
|
|
|
|
|
|
case GLSL_TYPE_ARRAY:
|
|
|
|
for (unsigned int i = 0; i < type->length; i++) {
|
|
|
|
offset += setup_uniform_values(loc + offset, type->fields.array);
|
|
|
|
}
|
|
|
|
return offset;
|
|
|
|
|
|
|
|
case GLSL_TYPE_SAMPLER:
|
|
|
|
/* The sampler takes up a slot, but we don't use any values from it. */
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
default:
|
|
|
|
assert(!"not reached");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
i965 new VS: don't share clip plane constants in pre-GEN6
In pre-GEN6, when using clip planes, both the vertex shader and the
clipper need access to the client-supplied clip planes, since the
vertex shader needs them to set the clip flags, and the clipper needs
them to determine where to insert new vertices.
With the old VS backend, we used a clever optimization to avoid
placing duplicate copies of these planes in the CURBE: we used the
same block of memory for both the clipper and vertex shader constants,
with the clip planes at the front of it, and then we instructed the
clipper to read just the initial part of this block containing the
clip planes.
This optimization was tricky, of dubious value, and not completely
working in the new VS backend, so I've removed it. Now, when using
the new VS backend, separate parts of the CURBE are used for the
clipper and the vertex shader. Note that this doesn't affect the
number of push constants available to the vertex shader, it simply
causes the CURBE to occupy a few more bytes of URB memory.
The old VS backend is unaffected. GEN6+, which does clipping entirely
in hardware, is also unaffected.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2011-09-24 05:33:50 +01:00
|
|
|
void
|
|
|
|
vec4_visitor::setup_uniform_clipplane_values()
|
|
|
|
{
|
2011-09-26 21:43:04 +01:00
|
|
|
gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
|
|
|
|
|
i965 Gen6+: De-compact clip planes.
Previously, if the user enabled a non-consecutive set of clip planes
(e.g. 0, 1, and 3), the driver would compact them down to a
consecutive set starting at 0. This optimization was of dubious
value, and complicated the implementation of gl_ClipDistance.
This patch changes the driver so that with Gen6 and later chipsets, we
no longer compact the clip planes. However, we still discard any clip
planes beyond the highest number that is in use, so performance should
not be affected for applications that use clip planes consecutively
from 0.
With chipsets previous to Gen6, we still compact the clip planes,
since the pre-Gen6 clipper thread relies on this behavior.
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-09-27 20:57:08 +01:00
|
|
|
/* Pre-Gen6, we compact clip planes. For example, if the user
|
|
|
|
* enables just clip planes 0, 1, and 3, we will enable clip planes
|
|
|
|
* 0, 1, and 2 in the hardware, and we'll move clip plane 3 to clip
|
|
|
|
* plane 2. This simplifies the implementation of the Gen6 clip
|
|
|
|
* thread.
|
|
|
|
*
|
|
|
|
* In Gen6 and later, we don't compact clip planes, because this
|
|
|
|
* simplifies the implementation of gl_ClipDistance.
|
|
|
|
*/
|
i965 new VS: don't share clip plane constants in pre-GEN6
In pre-GEN6, when using clip planes, both the vertex shader and the
clipper need access to the client-supplied clip planes, since the
vertex shader needs them to set the clip flags, and the clipper needs
them to determine where to insert new vertices.
With the old VS backend, we used a clever optimization to avoid
placing duplicate copies of these planes in the CURBE: we used the
same block of memory for both the clipper and vertex shader constants,
with the clip planes at the front of it, and then we instructed the
clipper to read just the initial part of this block containing the
clip planes.
This optimization was tricky, of dubious value, and not completely
working in the new VS backend, so I've removed it. Now, when using
the new VS backend, separate parts of the CURBE are used for the
clipper and the vertex shader. Note that this doesn't affect the
number of push constants available to the vertex shader, it simply
causes the CURBE to occupy a few more bytes of URB memory.
The old VS backend is unaffected. GEN6+, which does clipping entirely
in hardware, is also unaffected.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2011-09-24 05:33:50 +01:00
|
|
|
int compacted_clipplane_index = 0;
|
i965 Gen6+: De-compact clip planes.
Previously, if the user enabled a non-consecutive set of clip planes
(e.g. 0, 1, and 3), the driver would compact them down to a
consecutive set starting at 0. This optimization was of dubious
value, and complicated the implementation of gl_ClipDistance.
This patch changes the driver so that with Gen6 and later chipsets, we
no longer compact the clip planes. However, we still discard any clip
planes beyond the highest number that is in use, so performance should
not be affected for applications that use clip planes consecutively
from 0.
With chipsets previous to Gen6, we still compact the clip planes,
since the pre-Gen6 clipper thread relies on this behavior.
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-09-27 20:57:08 +01:00
|
|
|
for (int i = 0; i < c->key.nr_userclip_plane_consts; ++i) {
|
|
|
|
if (intel->gen < 6 &&
|
|
|
|
!(c->key.userclip_planes_enabled_gen_4_5 & (1 << i))) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
this->uniform_vector_size[this->uniforms] = 4;
|
|
|
|
this->userplane[compacted_clipplane_index] = dst_reg(UNIFORM, this->uniforms);
|
|
|
|
this->userplane[compacted_clipplane_index].type = BRW_REGISTER_TYPE_F;
|
|
|
|
for (int j = 0; j < 4; ++j) {
|
|
|
|
c->prog_data.param[this->uniforms * 4 + j] = &clip_planes[i][j];
|
i965 new VS: don't share clip plane constants in pre-GEN6
In pre-GEN6, when using clip planes, both the vertex shader and the
clipper need access to the client-supplied clip planes, since the
vertex shader needs them to set the clip flags, and the clipper needs
them to determine where to insert new vertices.
With the old VS backend, we used a clever optimization to avoid
placing duplicate copies of these planes in the CURBE: we used the
same block of memory for both the clipper and vertex shader constants,
with the clip planes at the front of it, and then we instructed the
clipper to read just the initial part of this block containing the
clip planes.
This optimization was tricky, of dubious value, and not completely
working in the new VS backend, so I've removed it. Now, when using
the new VS backend, separate parts of the CURBE are used for the
clipper and the vertex shader. Note that this doesn't affect the
number of push constants available to the vertex shader, it simply
causes the CURBE to occupy a few more bytes of URB memory.
The old VS backend is unaffected. GEN6+, which does clipping entirely
in hardware, is also unaffected.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2011-09-24 05:33:50 +01:00
|
|
|
}
|
i965 Gen6+: De-compact clip planes.
Previously, if the user enabled a non-consecutive set of clip planes
(e.g. 0, 1, and 3), the driver would compact them down to a
consecutive set starting at 0. This optimization was of dubious
value, and complicated the implementation of gl_ClipDistance.
This patch changes the driver so that with Gen6 and later chipsets, we
no longer compact the clip planes. However, we still discard any clip
planes beyond the highest number that is in use, so performance should
not be affected for applications that use clip planes consecutively
from 0.
With chipsets previous to Gen6, we still compact the clip planes,
since the pre-Gen6 clipper thread relies on this behavior.
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-09-27 20:57:08 +01:00
|
|
|
++compacted_clipplane_index;
|
|
|
|
++this->uniforms;
|
i965 new VS: don't share clip plane constants in pre-GEN6
In pre-GEN6, when using clip planes, both the vertex shader and the
clipper need access to the client-supplied clip planes, since the
vertex shader needs them to set the clip flags, and the clipper needs
them to determine where to insert new vertices.
With the old VS backend, we used a clever optimization to avoid
placing duplicate copies of these planes in the CURBE: we used the
same block of memory for both the clipper and vertex shader constants,
with the clip planes at the front of it, and then we instructed the
clipper to read just the initial part of this block containing the
clip planes.
This optimization was tricky, of dubious value, and not completely
working in the new VS backend, so I've removed it. Now, when using
the new VS backend, separate parts of the CURBE are used for the
clipper and the vertex shader. Note that this doesn't affect the
number of push constants available to the vertex shader, it simply
causes the CURBE to occupy a few more bytes of URB memory.
The old VS backend is unaffected. GEN6+, which does clipping entirely
in hardware, is also unaffected.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2011-09-24 05:33:50 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-05-04 20:50:16 +01:00
|
|
|
/* Our support for builtin uniforms is even scarier than non-builtin.
|
|
|
|
* It sits on top of the PROG_STATE_VAR parameters that are
|
|
|
|
* automatically updated from GL context state.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
|
|
|
|
{
|
|
|
|
const ir_state_slot *const slots = ir->state_slots;
|
|
|
|
assert(ir->state_slots != NULL);
|
|
|
|
|
|
|
|
for (unsigned int i = 0; i < ir->num_state_slots; i++) {
|
|
|
|
/* This state reference has already been setup by ir_to_mesa,
|
|
|
|
* but we'll get the same index back here. We can reference
|
|
|
|
* ParameterValues directly, since unlike brw_fs.cpp, we never
|
|
|
|
* add new state references during compile.
|
|
|
|
*/
|
|
|
|
int index = _mesa_add_state_reference(this->vp->Base.Parameters,
|
|
|
|
(gl_state_index *)slots[i].tokens);
|
|
|
|
float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f;
|
|
|
|
|
2011-08-22 18:35:24 +01:00
|
|
|
this->uniform_vector_size[this->uniforms] = 0;
|
2011-05-04 20:50:16 +01:00
|
|
|
/* Add each of the unique swizzled channels of the element.
|
|
|
|
* This will end up matching the size of the glsl_type of this field.
|
|
|
|
*/
|
|
|
|
int last_swiz = -1;
|
|
|
|
for (unsigned int j = 0; j < 4; j++) {
|
|
|
|
int swiz = GET_SWZ(slots[i].swizzle, j);
|
|
|
|
last_swiz = swiz;
|
|
|
|
|
|
|
|
c->prog_data.param[this->uniforms * 4 + j] = &values[swiz];
|
2011-08-09 22:49:29 +01:00
|
|
|
if (swiz <= last_swiz)
|
2011-08-22 18:35:24 +01:00
|
|
|
this->uniform_vector_size[this->uniforms]++;
|
2011-05-04 20:50:16 +01:00
|
|
|
}
|
|
|
|
this->uniforms++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-05-02 17:45:40 +01:00
|
|
|
dst_reg *
|
|
|
|
vec4_visitor::variable_storage(ir_variable *var)
|
|
|
|
{
|
|
|
|
return (dst_reg *)hash_table_find(this->variable_ht, var);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2011-08-31 00:23:44 +01:00
|
|
|
vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir, uint32_t *predicate)
|
2011-05-02 17:45:40 +01:00
|
|
|
{
|
|
|
|
ir_expression *expr = ir->as_expression();
|
|
|
|
|
2011-08-31 00:23:44 +01:00
|
|
|
*predicate = BRW_PREDICATE_NORMAL;
|
|
|
|
|
2011-05-02 17:45:40 +01:00
|
|
|
if (expr) {
|
|
|
|
src_reg op[2];
|
|
|
|
vec4_instruction *inst;
|
|
|
|
|
|
|
|
assert(expr->get_num_operands() <= 2);
|
|
|
|
for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
|
|
|
|
expr->operands[i]->accept(this);
|
|
|
|
op[i] = this->result;
|
2011-10-03 23:31:52 +01:00
|
|
|
|
|
|
|
resolve_ud_negate(&op[i]);
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
switch (expr->operation) {
|
|
|
|
case ir_unop_logic_not:
|
2011-08-27 18:52:16 +01:00
|
|
|
inst = emit(AND(dst_null_d(), op[0], src_reg(1)));
|
2011-05-02 17:45:40 +01:00
|
|
|
inst->conditional_mod = BRW_CONDITIONAL_Z;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ir_binop_logic_xor:
|
2011-08-27 18:52:16 +01:00
|
|
|
inst = emit(XOR(dst_null_d(), op[0], op[1]));
|
2011-05-02 17:45:40 +01:00
|
|
|
inst->conditional_mod = BRW_CONDITIONAL_NZ;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ir_binop_logic_or:
|
2011-08-27 18:52:16 +01:00
|
|
|
inst = emit(OR(dst_null_d(), op[0], op[1]));
|
2011-05-02 17:45:40 +01:00
|
|
|
inst->conditional_mod = BRW_CONDITIONAL_NZ;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ir_binop_logic_and:
|
2011-08-27 18:52:16 +01:00
|
|
|
inst = emit(AND(dst_null_d(), op[0], op[1]));
|
2011-05-02 17:45:40 +01:00
|
|
|
inst->conditional_mod = BRW_CONDITIONAL_NZ;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ir_unop_f2b:
|
|
|
|
if (intel->gen >= 6) {
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
|
2011-05-02 17:45:40 +01:00
|
|
|
} else {
|
2011-08-27 18:52:16 +01:00
|
|
|
inst = emit(MOV(dst_null_f(), op[0]));
|
|
|
|
inst->conditional_mod = BRW_CONDITIONAL_NZ;
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ir_unop_i2b:
|
|
|
|
if (intel->gen >= 6) {
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
|
2011-05-02 17:45:40 +01:00
|
|
|
} else {
|
2011-08-27 18:52:16 +01:00
|
|
|
inst = emit(MOV(dst_null_d(), op[0]));
|
|
|
|
inst->conditional_mod = BRW_CONDITIONAL_NZ;
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
2011-08-31 00:23:44 +01:00
|
|
|
case ir_binop_all_equal:
|
|
|
|
inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
|
|
|
|
*predicate = BRW_PREDICATE_ALIGN16_ALL4H;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ir_binop_any_nequal:
|
|
|
|
inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
|
|
|
|
*predicate = BRW_PREDICATE_ALIGN16_ANY4H;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ir_unop_any:
|
|
|
|
inst = emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
|
|
|
|
*predicate = BRW_PREDICATE_ALIGN16_ANY4H;
|
|
|
|
break;
|
|
|
|
|
2011-05-02 17:45:40 +01:00
|
|
|
case ir_binop_greater:
|
|
|
|
case ir_binop_gequal:
|
|
|
|
case ir_binop_less:
|
|
|
|
case ir_binop_lequal:
|
|
|
|
case ir_binop_equal:
|
|
|
|
case ir_binop_nequal:
|
2011-08-27 18:59:43 +01:00
|
|
|
emit(CMP(dst_null_d(), op[0], op[1],
|
2011-08-27 18:52:16 +01:00
|
|
|
brw_conditional_for_comparison(expr->operation)));
|
2011-05-02 17:45:40 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
assert(!"not reached");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
ir->accept(this);
|
|
|
|
|
2011-10-03 23:31:52 +01:00
|
|
|
resolve_ud_negate(&this->result);
|
|
|
|
|
2011-05-02 17:45:40 +01:00
|
|
|
if (intel->gen >= 6) {
|
2011-08-27 18:52:16 +01:00
|
|
|
vec4_instruction *inst = emit(AND(dst_null_d(),
|
|
|
|
this->result, src_reg(1)));
|
2011-05-02 17:45:40 +01:00
|
|
|
inst->conditional_mod = BRW_CONDITIONAL_NZ;
|
|
|
|
} else {
|
2011-08-27 18:52:16 +01:00
|
|
|
vec4_instruction *inst = emit(MOV(dst_null_d(), this->result));
|
2011-05-02 17:45:40 +01:00
|
|
|
inst->conditional_mod = BRW_CONDITIONAL_NZ;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Emit a gen6 IF statement with the comparison folded into the IF
|
|
|
|
* instruction.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
vec4_visitor::emit_if_gen6(ir_if *ir)
|
|
|
|
{
|
|
|
|
ir_expression *expr = ir->condition->as_expression();
|
|
|
|
|
|
|
|
if (expr) {
|
|
|
|
src_reg op[2];
|
|
|
|
dst_reg temp;
|
|
|
|
|
|
|
|
assert(expr->get_num_operands() <= 2);
|
|
|
|
for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
|
|
|
|
expr->operands[i]->accept(this);
|
|
|
|
op[i] = this->result;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (expr->operation) {
|
|
|
|
case ir_unop_logic_not:
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_Z));
|
2011-05-02 17:45:40 +01:00
|
|
|
return;
|
|
|
|
|
|
|
|
case ir_binop_logic_xor:
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ));
|
2011-05-02 17:45:40 +01:00
|
|
|
return;
|
|
|
|
|
|
|
|
case ir_binop_logic_or:
|
|
|
|
temp = dst_reg(this, glsl_type::bool_type);
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(OR(temp, op[0], op[1]));
|
|
|
|
emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
|
2011-05-02 17:45:40 +01:00
|
|
|
return;
|
|
|
|
|
|
|
|
case ir_binop_logic_and:
|
|
|
|
temp = dst_reg(this, glsl_type::bool_type);
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(AND(temp, op[0], op[1]));
|
|
|
|
emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
|
2011-05-02 17:45:40 +01:00
|
|
|
return;
|
|
|
|
|
|
|
|
case ir_unop_f2b:
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
|
2011-05-02 17:45:40 +01:00
|
|
|
return;
|
|
|
|
|
|
|
|
case ir_unop_i2b:
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
|
2011-05-02 17:45:40 +01:00
|
|
|
return;
|
|
|
|
|
|
|
|
case ir_binop_greater:
|
|
|
|
case ir_binop_gequal:
|
|
|
|
case ir_binop_less:
|
|
|
|
case ir_binop_lequal:
|
|
|
|
case ir_binop_equal:
|
|
|
|
case ir_binop_nequal:
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(IF(op[0], op[1],
|
|
|
|
brw_conditional_for_comparison(expr->operation)));
|
2011-05-02 17:45:40 +01:00
|
|
|
return;
|
2011-08-06 04:03:31 +01:00
|
|
|
|
|
|
|
case ir_binop_all_equal:
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
|
|
|
|
emit(IF(BRW_PREDICATE_ALIGN16_ALL4H));
|
2011-08-06 04:03:31 +01:00
|
|
|
return;
|
|
|
|
|
|
|
|
case ir_binop_any_nequal:
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
|
|
|
|
emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
|
2011-08-10 22:13:23 +01:00
|
|
|
return;
|
|
|
|
|
|
|
|
case ir_unop_any:
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
|
|
|
|
emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
|
2011-08-06 04:03:31 +01:00
|
|
|
return;
|
|
|
|
|
2011-05-02 17:45:40 +01:00
|
|
|
default:
|
|
|
|
assert(!"not reached");
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
|
2011-05-02 17:45:40 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
ir->condition->accept(this);
|
|
|
|
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(IF(this->result, src_reg(0), BRW_CONDITIONAL_NZ));
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
vec4_visitor::visit(ir_variable *ir)
|
|
|
|
{
|
|
|
|
dst_reg *reg = NULL;
|
|
|
|
|
|
|
|
if (variable_storage(ir))
|
|
|
|
return;
|
|
|
|
|
|
|
|
switch (ir->mode) {
|
|
|
|
case ir_var_in:
|
|
|
|
reg = new(mem_ctx) dst_reg(ATTR, ir->location);
|
2011-08-23 21:30:42 +01:00
|
|
|
|
|
|
|
/* Do GL_FIXED rescaling for GLES2.0. Our GL_FIXED attributes
|
|
|
|
* come in as floating point conversions of the integer values.
|
|
|
|
*/
|
|
|
|
for (int i = ir->location; i < ir->location + type_size(ir->type); i++) {
|
|
|
|
if (!c->key.gl_fixed_input_size[i])
|
|
|
|
continue;
|
|
|
|
|
|
|
|
dst_reg dst = *reg;
|
|
|
|
dst.writemask = (1 << c->key.gl_fixed_input_size[i]) - 1;
|
2011-08-27 19:13:33 +01:00
|
|
|
emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f)));
|
2011-08-23 21:30:42 +01:00
|
|
|
}
|
2011-05-02 17:45:40 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
case ir_var_out:
|
|
|
|
reg = new(mem_ctx) dst_reg(this, ir->type);
|
|
|
|
|
|
|
|
for (int i = 0; i < type_size(ir->type); i++) {
|
|
|
|
output_reg[ir->location + i] = *reg;
|
|
|
|
output_reg[ir->location + i].reg_offset = i;
|
2011-08-06 04:54:25 +01:00
|
|
|
output_reg[ir->location + i].type = BRW_REGISTER_TYPE_F;
|
2011-09-06 21:29:21 +01:00
|
|
|
output_reg_annotation[ir->location + i] = ir->name;
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
2011-08-06 00:35:24 +01:00
|
|
|
case ir_var_auto:
|
2011-05-02 17:45:40 +01:00
|
|
|
case ir_var_temporary:
|
|
|
|
reg = new(mem_ctx) dst_reg(this, ir->type);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ir_var_uniform:
|
2011-05-04 20:50:16 +01:00
|
|
|
reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
|
|
|
|
|
2011-08-22 18:35:24 +01:00
|
|
|
/* Track how big the whole uniform variable is, in case we need to put a
|
|
|
|
* copy of its data into pull constants for array access.
|
|
|
|
*/
|
|
|
|
this->uniform_size[this->uniforms] = type_size(ir->type);
|
|
|
|
|
2011-05-04 20:50:16 +01:00
|
|
|
if (!strncmp(ir->name, "gl_", 3)) {
|
|
|
|
setup_builtin_uniform_values(ir);
|
|
|
|
} else {
|
|
|
|
setup_uniform_values(ir->location, ir->type);
|
|
|
|
}
|
2011-05-02 17:45:40 +01:00
|
|
|
break;
|
2011-08-06 00:35:24 +01:00
|
|
|
|
|
|
|
default:
|
|
|
|
assert(!"not reached");
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
2011-05-04 20:50:16 +01:00
|
|
|
|
|
|
|
reg->type = brw_type_for_base_type(ir->type);
|
|
|
|
hash_table_insert(this->variable_ht, reg, ir);
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
vec4_visitor::visit(ir_loop *ir)
|
|
|
|
{
|
2011-08-09 22:35:38 +01:00
|
|
|
dst_reg counter;
|
2011-08-06 03:05:42 +01:00
|
|
|
|
2011-05-02 17:45:40 +01:00
|
|
|
/* We don't want debugging output to print the whole body of the
|
|
|
|
* loop as the annotation.
|
|
|
|
*/
|
|
|
|
this->base_ir = NULL;
|
|
|
|
|
2011-08-09 22:35:38 +01:00
|
|
|
if (ir->counter != NULL) {
|
|
|
|
this->base_ir = ir->counter;
|
|
|
|
ir->counter->accept(this);
|
|
|
|
counter = *(variable_storage(ir->counter));
|
2011-05-02 17:45:40 +01:00
|
|
|
|
2011-08-09 22:35:38 +01:00
|
|
|
if (ir->from != NULL) {
|
|
|
|
this->base_ir = ir->from;
|
|
|
|
ir->from->accept(this);
|
2011-05-02 17:45:40 +01:00
|
|
|
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(MOV(counter, this->result));
|
2011-08-09 22:35:38 +01:00
|
|
|
}
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
emit(BRW_OPCODE_DO);
|
|
|
|
|
|
|
|
if (ir->to) {
|
2011-08-09 22:35:38 +01:00
|
|
|
this->base_ir = ir->to;
|
|
|
|
ir->to->accept(this);
|
2011-05-02 17:45:40 +01:00
|
|
|
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(CMP(dst_null_d(), src_reg(counter), this->result,
|
|
|
|
brw_conditional_for_comparison(ir->cmp)));
|
2011-05-02 17:45:40 +01:00
|
|
|
|
2011-08-27 18:52:16 +01:00
|
|
|
vec4_instruction *inst = emit(BRW_OPCODE_BREAK);
|
2011-08-09 22:35:38 +01:00
|
|
|
inst->predicate = BRW_PREDICATE_NORMAL;
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
visit_instructions(&ir->body_instructions);
|
|
|
|
|
|
|
|
|
2011-08-09 22:35:38 +01:00
|
|
|
if (ir->increment) {
|
|
|
|
this->base_ir = ir->increment;
|
|
|
|
ir->increment->accept(this);
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(ADD(counter, src_reg(counter), this->result));
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
emit(BRW_OPCODE_WHILE);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
vec4_visitor::visit(ir_loop_jump *ir)
|
|
|
|
{
|
|
|
|
switch (ir->mode) {
|
|
|
|
case ir_loop_jump::jump_break:
|
|
|
|
emit(BRW_OPCODE_BREAK);
|
|
|
|
break;
|
|
|
|
case ir_loop_jump::jump_continue:
|
|
|
|
emit(BRW_OPCODE_CONTINUE);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
vec4_visitor::visit(ir_function_signature *ir)
|
|
|
|
{
|
|
|
|
assert(0);
|
|
|
|
(void)ir;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
vec4_visitor::visit(ir_function *ir)
|
|
|
|
{
|
|
|
|
/* Ignore function bodies other than main() -- we shouldn't see calls to
|
|
|
|
* them since they should all be inlined.
|
|
|
|
*/
|
|
|
|
if (strcmp(ir->name, "main") == 0) {
|
|
|
|
const ir_function_signature *sig;
|
|
|
|
exec_list empty;
|
|
|
|
|
|
|
|
sig = ir->matching_signature(&empty);
|
|
|
|
|
|
|
|
assert(sig);
|
|
|
|
|
|
|
|
visit_instructions(&sig->body);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-10-07 20:26:50 +01:00
|
|
|
bool
|
2011-05-02 17:45:40 +01:00
|
|
|
vec4_visitor::try_emit_sat(ir_expression *ir)
|
|
|
|
{
|
|
|
|
ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
|
|
|
|
if (!sat_src)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
sat_src->accept(this);
|
|
|
|
src_reg src = this->result;
|
|
|
|
|
|
|
|
this->result = src_reg(this, ir->type);
|
|
|
|
vec4_instruction *inst;
|
2011-08-27 18:52:16 +01:00
|
|
|
inst = emit(MOV(dst_reg(this->result), src));
|
2011-05-02 17:45:40 +01:00
|
|
|
inst->saturate = true;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
vec4_visitor::emit_bool_comparison(unsigned int op,
|
|
|
|
dst_reg dst, src_reg src0, src_reg src1)
|
|
|
|
{
|
|
|
|
/* original gen4 does destination conversion before comparison. */
|
|
|
|
if (intel->gen < 5)
|
|
|
|
dst.type = src0.type;
|
|
|
|
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(CMP(dst, src0, src1, brw_conditional_for_comparison(op)));
|
2011-05-02 17:45:40 +01:00
|
|
|
|
|
|
|
dst.type = BRW_REGISTER_TYPE_D;
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(AND(dst, src_reg(dst), src_reg(0x1)));
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
vec4_visitor::visit(ir_expression *ir)
|
|
|
|
{
|
|
|
|
unsigned int operand;
|
|
|
|
src_reg op[Elements(ir->operands)];
|
|
|
|
src_reg result_src;
|
|
|
|
dst_reg result_dst;
|
|
|
|
vec4_instruction *inst;
|
|
|
|
|
|
|
|
if (try_emit_sat(ir))
|
|
|
|
return;
|
|
|
|
|
|
|
|
for (operand = 0; operand < ir->get_num_operands(); operand++) {
|
|
|
|
this->result.file = BAD_FILE;
|
|
|
|
ir->operands[operand]->accept(this);
|
|
|
|
if (this->result.file == BAD_FILE) {
|
|
|
|
printf("Failed to get tree for expression operand:\n");
|
|
|
|
ir->operands[operand]->print();
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
op[operand] = this->result;
|
|
|
|
|
|
|
|
/* Matrix expression operands should have been broken down to vector
|
|
|
|
* operations already.
|
|
|
|
*/
|
|
|
|
assert(!ir->operands[operand]->type->is_matrix());
|
|
|
|
}
|
|
|
|
|
|
|
|
int vector_elements = ir->operands[0]->type->vector_elements;
|
|
|
|
if (ir->operands[1]) {
|
|
|
|
vector_elements = MAX2(vector_elements,
|
|
|
|
ir->operands[1]->type->vector_elements);
|
|
|
|
}
|
|
|
|
|
|
|
|
this->result.file = BAD_FILE;
|
|
|
|
|
|
|
|
/* Storage for our result. Ideally for an assignment we'd be using
|
|
|
|
* the actual storage for the result here, instead.
|
|
|
|
*/
|
|
|
|
result_src = src_reg(this, ir->type);
|
|
|
|
/* convenience for the emit functions below. */
|
|
|
|
result_dst = dst_reg(result_src);
|
|
|
|
/* If nothing special happens, this is the result. */
|
|
|
|
this->result = result_src;
|
|
|
|
/* Limit writes to the channels that will be used by result_src later.
|
|
|
|
* This does limit this temp's use as a temporary for multi-instruction
|
|
|
|
* sequences.
|
|
|
|
*/
|
|
|
|
result_dst.writemask = (1 << ir->type->vector_elements) - 1;
|
|
|
|
|
|
|
|
switch (ir->operation) {
|
|
|
|
case ir_unop_logic_not:
|
|
|
|
/* Note that BRW_OPCODE_NOT is not appropriate here, since it is
|
|
|
|
* ones complement of the whole register, not just bit 0.
|
|
|
|
*/
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(XOR(result_dst, op[0], src_reg(1)));
|
2011-05-02 17:45:40 +01:00
|
|
|
break;
|
|
|
|
case ir_unop_neg:
|
|
|
|
op[0].negate = !op[0].negate;
|
|
|
|
this->result = op[0];
|
|
|
|
break;
|
|
|
|
case ir_unop_abs:
|
|
|
|
op[0].abs = true;
|
|
|
|
op[0].negate = false;
|
|
|
|
this->result = op[0];
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ir_unop_sign:
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(MOV(result_dst, src_reg(0.0f)));
|
2011-05-02 17:45:40 +01:00
|
|
|
|
2011-08-27 18:59:43 +01:00
|
|
|
emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_G));
|
2011-08-27 18:52:16 +01:00
|
|
|
inst = emit(MOV(result_dst, src_reg(1.0f)));
|
2011-05-02 17:45:40 +01:00
|
|
|
inst->predicate = BRW_PREDICATE_NORMAL;
|
|
|
|
|
2011-08-27 18:59:43 +01:00
|
|
|
emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_L));
|
2011-08-27 18:52:16 +01:00
|
|
|
inst = emit(MOV(result_dst, src_reg(-1.0f)));
|
2011-05-02 17:45:40 +01:00
|
|
|
inst->predicate = BRW_PREDICATE_NORMAL;
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ir_unop_rcp:
|
|
|
|
emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ir_unop_exp2:
|
|
|
|
emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
|
|
|
|
break;
|
|
|
|
case ir_unop_log2:
|
|
|
|
emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
|
|
|
|
break;
|
|
|
|
case ir_unop_exp:
|
|
|
|
case ir_unop_log:
|
|
|
|
assert(!"not reached: should be handled by ir_explog_to_explog2");
|
|
|
|
break;
|
|
|
|
case ir_unop_sin:
|
|
|
|
case ir_unop_sin_reduced:
|
|
|
|
emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
|
|
|
|
break;
|
|
|
|
case ir_unop_cos:
|
|
|
|
case ir_unop_cos_reduced:
|
|
|
|
emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ir_unop_dFdx:
|
|
|
|
case ir_unop_dFdy:
|
|
|
|
assert(!"derivatives not valid in vertex shader");
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ir_unop_noise:
|
|
|
|
assert(!"not reached: should be handled by lower_noise");
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ir_binop_add:
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(ADD(result_dst, op[0], op[1]));
|
2011-05-02 17:45:40 +01:00
|
|
|
break;
|
|
|
|
case ir_binop_sub:
|
|
|
|
assert(!"not reached: should be handled by ir_sub_to_add_neg");
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ir_binop_mul:
|
2011-08-16 05:02:10 +01:00
|
|
|
if (ir->type->is_integer()) {
|
|
|
|
/* For integer multiplication, the MUL uses the low 16 bits
|
|
|
|
* of one of the operands (src0 on gen6, src1 on gen7). The
|
|
|
|
* MACH accumulates in the contribution of the upper 16 bits
|
|
|
|
* of that operand.
|
|
|
|
*
|
|
|
|
* FINISHME: Emit just the MUL if we know an operand is small
|
|
|
|
* enough.
|
|
|
|
*/
|
|
|
|
struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
|
|
|
|
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(MUL(acc, op[0], op[1]));
|
|
|
|
emit(MACH(dst_null_d(), op[0], op[1]));
|
|
|
|
emit(MOV(result_dst, src_reg(acc)));
|
2011-08-16 05:02:10 +01:00
|
|
|
} else {
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(MUL(result_dst, op[0], op[1]));
|
2011-08-16 05:02:10 +01:00
|
|
|
}
|
2011-05-02 17:45:40 +01:00
|
|
|
break;
|
|
|
|
case ir_binop_div:
|
2011-09-29 01:37:55 +01:00
|
|
|
/* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
|
|
|
|
assert(ir->type->is_integer());
|
|
|
|
emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]);
|
|
|
|
break;
|
2011-05-02 17:45:40 +01:00
|
|
|
case ir_binop_mod:
|
2011-09-29 01:37:55 +01:00
|
|
|
/* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
|
|
|
|
assert(ir->type->is_integer());
|
|
|
|
emit_math(SHADER_OPCODE_INT_REMAINDER, result_dst, op[0], op[1]);
|
2011-05-02 17:45:40 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
case ir_binop_less:
|
|
|
|
case ir_binop_greater:
|
|
|
|
case ir_binop_lequal:
|
|
|
|
case ir_binop_gequal:
|
|
|
|
case ir_binop_equal:
|
|
|
|
case ir_binop_nequal: {
|
2011-08-27 18:59:43 +01:00
|
|
|
emit(CMP(result_dst, op[0], op[1],
|
2011-08-27 18:52:16 +01:00
|
|
|
brw_conditional_for_comparison(ir->operation)));
|
2011-08-27 18:59:43 +01:00
|
|
|
emit(AND(result_dst, result_src, src_reg(0x1)));
|
2011-05-02 17:45:40 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case ir_binop_all_equal:
|
|
|
|
/* "==" operator producing a scalar boolean. */
|
|
|
|
if (ir->operands[0]->type->is_vector() ||
|
|
|
|
ir->operands[1]->type->is_vector()) {
|
2011-08-27 18:59:43 +01:00
|
|
|
emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(MOV(result_dst, src_reg(0)));
|
|
|
|
inst = emit(MOV(result_dst, src_reg(1)));
|
2011-05-02 17:45:40 +01:00
|
|
|
inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
|
|
|
|
} else {
|
2011-08-27 18:59:43 +01:00
|
|
|
emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z));
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(AND(result_dst, result_src, src_reg(0x1)));
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case ir_binop_any_nequal:
|
|
|
|
/* "!=" operator producing a scalar boolean. */
|
|
|
|
if (ir->operands[0]->type->is_vector() ||
|
|
|
|
ir->operands[1]->type->is_vector()) {
|
2011-08-27 18:59:43 +01:00
|
|
|
emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
|
2011-05-02 17:45:40 +01:00
|
|
|
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(MOV(result_dst, src_reg(0)));
|
|
|
|
inst = emit(MOV(result_dst, src_reg(1)));
|
2011-05-02 17:45:40 +01:00
|
|
|
inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
|
|
|
|
} else {
|
2011-08-27 18:59:43 +01:00
|
|
|
emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ));
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(AND(result_dst, result_src, src_reg(0x1)));
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ir_unop_any:
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
|
|
|
|
emit(MOV(result_dst, src_reg(0)));
|
2011-08-09 18:57:09 +01:00
|
|
|
|
2011-08-27 18:52:16 +01:00
|
|
|
inst = emit(MOV(result_dst, src_reg(1)));
|
2011-05-02 17:45:40 +01:00
|
|
|
inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ir_binop_logic_xor:
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(XOR(result_dst, op[0], op[1]));
|
2011-05-02 17:45:40 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
case ir_binop_logic_or:
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(OR(result_dst, op[0], op[1]));
|
2011-05-02 17:45:40 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
case ir_binop_logic_and:
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(AND(result_dst, op[0], op[1]));
|
2011-05-02 17:45:40 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
case ir_binop_dot:
|
|
|
|
assert(ir->operands[0]->type->is_vector());
|
|
|
|
assert(ir->operands[0]->type == ir->operands[1]->type);
|
|
|
|
emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ir_unop_sqrt:
|
|
|
|
emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
|
|
|
|
break;
|
|
|
|
case ir_unop_rsq:
|
|
|
|
emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
|
|
|
|
break;
|
|
|
|
case ir_unop_i2f:
|
|
|
|
case ir_unop_i2u:
|
|
|
|
case ir_unop_u2i:
|
|
|
|
case ir_unop_u2f:
|
|
|
|
case ir_unop_b2f:
|
|
|
|
case ir_unop_b2i:
|
|
|
|
case ir_unop_f2i:
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(MOV(result_dst, op[0]));
|
2011-05-02 17:45:40 +01:00
|
|
|
break;
|
|
|
|
case ir_unop_f2b:
|
|
|
|
case ir_unop_i2b: {
|
2011-08-27 18:59:43 +01:00
|
|
|
emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(AND(result_dst, result_src, src_reg(1)));
|
2011-05-02 17:45:40 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case ir_unop_trunc:
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(RNDZ(result_dst, op[0]));
|
2011-05-02 17:45:40 +01:00
|
|
|
break;
|
|
|
|
case ir_unop_ceil:
|
|
|
|
op[0].negate = !op[0].negate;
|
2011-08-27 18:52:16 +01:00
|
|
|
inst = emit(RNDD(result_dst, op[0]));
|
2011-05-02 17:45:40 +01:00
|
|
|
this->result.negate = true;
|
|
|
|
break;
|
|
|
|
case ir_unop_floor:
|
2011-08-27 18:52:16 +01:00
|
|
|
inst = emit(RNDD(result_dst, op[0]));
|
2011-05-02 17:45:40 +01:00
|
|
|
break;
|
|
|
|
case ir_unop_fract:
|
2011-08-27 18:52:16 +01:00
|
|
|
inst = emit(FRC(result_dst, op[0]));
|
2011-05-02 17:45:40 +01:00
|
|
|
break;
|
|
|
|
case ir_unop_round_even:
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(RNDE(result_dst, op[0]));
|
2011-05-02 17:45:40 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
case ir_binop_min:
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_L));
|
2011-05-02 17:45:40 +01:00
|
|
|
|
|
|
|
inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
|
|
|
|
inst->predicate = BRW_PREDICATE_NORMAL;
|
|
|
|
break;
|
|
|
|
case ir_binop_max:
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_G));
|
2011-05-02 17:45:40 +01:00
|
|
|
|
|
|
|
inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
|
|
|
|
inst->predicate = BRW_PREDICATE_NORMAL;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ir_binop_pow:
|
|
|
|
emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ir_unop_bit_not:
|
2011-08-27 18:52:16 +01:00
|
|
|
inst = emit(NOT(result_dst, op[0]));
|
2011-05-02 17:45:40 +01:00
|
|
|
break;
|
|
|
|
case ir_binop_bit_and:
|
2011-08-27 18:52:16 +01:00
|
|
|
inst = emit(AND(result_dst, op[0], op[1]));
|
2011-05-02 17:45:40 +01:00
|
|
|
break;
|
|
|
|
case ir_binop_bit_xor:
|
2011-08-27 18:52:16 +01:00
|
|
|
inst = emit(XOR(result_dst, op[0], op[1]));
|
2011-05-02 17:45:40 +01:00
|
|
|
break;
|
|
|
|
case ir_binop_bit_or:
|
2011-08-27 18:52:16 +01:00
|
|
|
inst = emit(OR(result_dst, op[0], op[1]));
|
2011-05-02 17:45:40 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
case ir_binop_lshift:
|
2011-09-27 22:48:56 +01:00
|
|
|
inst = emit(BRW_OPCODE_SHL, result_dst, op[0], op[1]);
|
|
|
|
break;
|
|
|
|
|
2011-05-02 17:45:40 +01:00
|
|
|
case ir_binop_rshift:
|
2011-09-27 22:48:56 +01:00
|
|
|
if (ir->type->base_type == GLSL_TYPE_INT)
|
|
|
|
inst = emit(BRW_OPCODE_ASR, result_dst, op[0], op[1]);
|
|
|
|
else
|
|
|
|
inst = emit(BRW_OPCODE_SHR, result_dst, op[0], op[1]);
|
2011-05-02 17:45:40 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
case ir_quadop_vector:
|
|
|
|
assert(!"not reached: should be handled by lower_quadop_vector");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
vec4_visitor::visit(ir_swizzle *ir)
|
|
|
|
{
|
|
|
|
src_reg src;
|
|
|
|
int i = 0;
|
|
|
|
int swizzle[4];
|
|
|
|
|
|
|
|
/* Note that this is only swizzles in expressions, not those on the left
|
|
|
|
* hand side of an assignment, which do write masking. See ir_assignment
|
|
|
|
* for that.
|
|
|
|
*/
|
|
|
|
|
|
|
|
ir->val->accept(this);
|
|
|
|
src = this->result;
|
|
|
|
assert(src.file != BAD_FILE);
|
|
|
|
|
2011-08-06 00:37:18 +01:00
|
|
|
for (i = 0; i < ir->type->vector_elements; i++) {
|
2011-05-02 17:45:40 +01:00
|
|
|
switch (i) {
|
|
|
|
case 0:
|
|
|
|
swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
|
|
|
|
break;
|
|
|
|
case 3:
|
|
|
|
swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for (; i < 4; i++) {
|
|
|
|
/* Replicate the last channel out. */
|
|
|
|
swizzle[i] = swizzle[ir->type->vector_elements - 1];
|
|
|
|
}
|
|
|
|
|
|
|
|
src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
|
|
|
|
|
|
|
|
this->result = src;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
vec4_visitor::visit(ir_dereference_variable *ir)
|
|
|
|
{
|
2011-08-06 03:29:41 +01:00
|
|
|
const struct glsl_type *type = ir->type;
|
2011-05-02 17:45:40 +01:00
|
|
|
dst_reg *reg = variable_storage(ir->var);
|
|
|
|
|
|
|
|
if (!reg) {
|
|
|
|
fail("Failed to find variable storage for %s\n", ir->var->name);
|
|
|
|
this->result = src_reg(brw_null_reg());
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
this->result = src_reg(*reg);
|
2011-08-06 03:29:41 +01:00
|
|
|
|
|
|
|
if (type->is_scalar() || type->is_vector() || type->is_matrix())
|
|
|
|
this->result.swizzle = swizzle_for_size(type->vector_elements);
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
vec4_visitor::visit(ir_dereference_array *ir)
|
|
|
|
{
|
|
|
|
ir_constant *constant_index;
|
|
|
|
src_reg src;
|
|
|
|
int element_size = type_size(ir->type);
|
|
|
|
|
|
|
|
constant_index = ir->array_index->constant_expression_value();
|
|
|
|
|
|
|
|
ir->array->accept(this);
|
|
|
|
src = this->result;
|
|
|
|
|
|
|
|
if (constant_index) {
|
|
|
|
src.reg_offset += constant_index->value.i[0] * element_size;
|
|
|
|
} else {
|
|
|
|
/* Variable index array dereference. It eats the "vec4" of the
|
|
|
|
* base of the array and an index that offsets the Mesa register
|
|
|
|
* index.
|
|
|
|
*/
|
|
|
|
ir->array_index->accept(this);
|
|
|
|
|
|
|
|
src_reg index_reg;
|
|
|
|
|
|
|
|
if (element_size == 1) {
|
|
|
|
index_reg = this->result;
|
|
|
|
} else {
|
2011-08-07 18:59:39 +01:00
|
|
|
index_reg = src_reg(this, glsl_type::int_type);
|
2011-05-02 17:45:40 +01:00
|
|
|
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(MUL(dst_reg(index_reg), this->result, src_reg(element_size)));
|
2011-08-07 18:59:39 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (src.reladdr) {
|
|
|
|
src_reg temp = src_reg(this, glsl_type::int_type);
|
|
|
|
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(ADD(dst_reg(temp), *src.reladdr, index_reg));
|
2011-08-07 18:59:39 +01:00
|
|
|
|
|
|
|
index_reg = temp;
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
src.reladdr = ralloc(mem_ctx, src_reg);
|
|
|
|
memcpy(src.reladdr, &index_reg, sizeof(index_reg));
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If the type is smaller than a vec4, replicate the last channel out. */
|
|
|
|
if (ir->type->is_scalar() || ir->type->is_vector())
|
|
|
|
src.swizzle = swizzle_for_size(ir->type->vector_elements);
|
|
|
|
else
|
|
|
|
src.swizzle = BRW_SWIZZLE_NOOP;
|
2011-08-06 03:38:44 +01:00
|
|
|
src.type = brw_type_for_base_type(ir->type);
|
2011-05-02 17:45:40 +01:00
|
|
|
|
|
|
|
this->result = src;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
vec4_visitor::visit(ir_dereference_record *ir)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
const glsl_type *struct_type = ir->record->type;
|
|
|
|
int offset = 0;
|
|
|
|
|
|
|
|
ir->record->accept(this);
|
|
|
|
|
|
|
|
for (i = 0; i < struct_type->length; i++) {
|
|
|
|
if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
|
|
|
|
break;
|
|
|
|
offset += type_size(struct_type->fields.structure[i].type);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If the type is smaller than a vec4, replicate the last channel out. */
|
|
|
|
if (ir->type->is_scalar() || ir->type->is_vector())
|
|
|
|
this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
|
|
|
|
else
|
|
|
|
this->result.swizzle = BRW_SWIZZLE_NOOP;
|
2011-08-06 03:38:44 +01:00
|
|
|
this->result.type = brw_type_for_base_type(ir->type);
|
2011-05-02 17:45:40 +01:00
|
|
|
|
|
|
|
this->result.reg_offset += offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* We want to be careful in assignment setup to hit the actual storage
|
|
|
|
* instead of potentially using a temporary like we might with the
|
|
|
|
* ir_dereference handler.
|
|
|
|
*/
|
|
|
|
static dst_reg
|
|
|
|
get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
|
|
|
|
{
|
|
|
|
/* The LHS must be a dereference. If the LHS is a variable indexed array
|
|
|
|
* access of a vector, it must be separated into a series conditional moves
|
|
|
|
* before reaching this point (see ir_vec_index_to_cond_assign).
|
|
|
|
*/
|
|
|
|
assert(ir->as_dereference());
|
|
|
|
ir_dereference_array *deref_array = ir->as_dereference_array();
|
|
|
|
if (deref_array) {
|
|
|
|
assert(!deref_array->array->type->is_vector());
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Use the rvalue deref handler for the most part. We'll ignore
|
|
|
|
* swizzles in it and write swizzles using writemask, though.
|
|
|
|
*/
|
|
|
|
ir->accept(v);
|
|
|
|
return dst_reg(v->result);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2011-08-06 04:46:03 +01:00
|
|
|
vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
|
2011-08-31 00:23:44 +01:00
|
|
|
const struct glsl_type *type, uint32_t predicate)
|
2011-05-02 17:45:40 +01:00
|
|
|
{
|
2011-08-06 04:46:03 +01:00
|
|
|
if (type->base_type == GLSL_TYPE_STRUCT) {
|
|
|
|
for (unsigned int i = 0; i < type->length; i++) {
|
2011-08-31 00:23:44 +01:00
|
|
|
emit_block_move(dst, src, type->fields.structure[i].type, predicate);
|
2011-08-06 04:46:03 +01:00
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
2011-05-02 17:45:40 +01:00
|
|
|
|
2011-08-06 04:46:03 +01:00
|
|
|
if (type->is_array()) {
|
|
|
|
for (unsigned int i = 0; i < type->length; i++) {
|
2011-08-31 00:23:44 +01:00
|
|
|
emit_block_move(dst, src, type->fields.array, predicate);
|
2011-08-06 04:46:03 +01:00
|
|
|
}
|
|
|
|
return;
|
2011-08-06 04:26:48 +01:00
|
|
|
}
|
|
|
|
|
2011-08-06 04:46:03 +01:00
|
|
|
if (type->is_matrix()) {
|
|
|
|
const struct glsl_type *vec_type;
|
2011-05-02 17:45:40 +01:00
|
|
|
|
2011-08-06 04:46:03 +01:00
|
|
|
vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
|
|
|
|
type->vector_elements, 1);
|
2011-05-02 17:45:40 +01:00
|
|
|
|
2011-08-06 04:46:03 +01:00
|
|
|
for (int i = 0; i < type->matrix_columns; i++) {
|
2011-08-31 00:23:44 +01:00
|
|
|
emit_block_move(dst, src, vec_type, predicate);
|
2011-08-06 04:46:03 +01:00
|
|
|
}
|
|
|
|
return;
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
2011-08-06 04:46:03 +01:00
|
|
|
|
|
|
|
assert(type->is_scalar() || type->is_vector());
|
|
|
|
|
|
|
|
dst->type = brw_type_for_base_type(type);
|
|
|
|
src->type = dst->type;
|
|
|
|
|
|
|
|
dst->writemask = (1 << type->vector_elements) - 1;
|
|
|
|
|
|
|
|
/* Do we need to worry about swizzling a swizzle? */
|
2011-10-01 00:24:35 +01:00
|
|
|
assert(src->swizzle == BRW_SWIZZLE_NOOP
|
|
|
|
|| src->swizzle == swizzle_for_size(type->vector_elements));
|
2011-08-06 04:46:03 +01:00
|
|
|
src->swizzle = swizzle_for_size(type->vector_elements);
|
|
|
|
|
2011-08-27 18:52:16 +01:00
|
|
|
vec4_instruction *inst = emit(MOV(*dst, *src));
|
2011-08-31 00:23:44 +01:00
|
|
|
inst->predicate = predicate;
|
2011-08-06 04:46:03 +01:00
|
|
|
|
|
|
|
dst->reg_offset++;
|
|
|
|
src->reg_offset++;
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
|
|
|
|
2011-08-11 16:09:10 +01:00
|
|
|
|
|
|
|
/* If the RHS processing resulted in an instruction generating a
|
|
|
|
* temporary value, and it would be easy to rewrite the instruction to
|
|
|
|
* generate its result right into the LHS instead, do so. This ends
|
|
|
|
* up reliably removing instructions where it can be tricky to do so
|
|
|
|
* later without real UD chain information.
|
|
|
|
*/
|
|
|
|
bool
|
|
|
|
vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
|
|
|
|
dst_reg dst,
|
|
|
|
src_reg src,
|
|
|
|
vec4_instruction *pre_rhs_inst,
|
|
|
|
vec4_instruction *last_rhs_inst)
|
|
|
|
{
|
|
|
|
/* This could be supported, but it would take more smarts. */
|
|
|
|
if (ir->condition)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (pre_rhs_inst == last_rhs_inst)
|
|
|
|
return false; /* No instructions generated to work with. */
|
|
|
|
|
|
|
|
/* Make sure the last instruction generated our source reg. */
|
|
|
|
if (src.file != GRF ||
|
|
|
|
src.file != last_rhs_inst->dst.file ||
|
|
|
|
src.reg != last_rhs_inst->dst.reg ||
|
|
|
|
src.reg_offset != last_rhs_inst->dst.reg_offset ||
|
|
|
|
src.reladdr ||
|
|
|
|
src.abs ||
|
|
|
|
src.negate ||
|
|
|
|
last_rhs_inst->predicate != BRW_PREDICATE_NONE)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* Check that that last instruction fully initialized the channels
|
|
|
|
* we want to use, in the order we want to use them. We could
|
|
|
|
* potentially reswizzle the operands of many instructions so that
|
|
|
|
* we could handle out of order channels, but don't yet.
|
|
|
|
*/
|
2011-10-15 23:36:11 +01:00
|
|
|
|
|
|
|
for (unsigned i = 0; i < 4; i++) {
|
2011-08-11 16:09:10 +01:00
|
|
|
if (dst.writemask & (1 << i)) {
|
|
|
|
if (!(last_rhs_inst->dst.writemask & (1 << i)))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (BRW_GET_SWZ(src.swizzle, i) != i)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Success! Rewrite the instruction. */
|
|
|
|
last_rhs_inst->dst.file = dst.file;
|
|
|
|
last_rhs_inst->dst.reg = dst.reg;
|
|
|
|
last_rhs_inst->dst.reg_offset = dst.reg_offset;
|
|
|
|
last_rhs_inst->dst.reladdr = dst.reladdr;
|
|
|
|
last_rhs_inst->dst.writemask &= dst.writemask;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2011-05-02 17:45:40 +01:00
|
|
|
void
|
|
|
|
vec4_visitor::visit(ir_assignment *ir)
|
|
|
|
{
|
2011-08-06 04:46:03 +01:00
|
|
|
dst_reg dst = get_assignment_lhs(ir->lhs, this);
|
2011-08-31 00:23:44 +01:00
|
|
|
uint32_t predicate = BRW_PREDICATE_NONE;
|
2011-08-06 04:46:03 +01:00
|
|
|
|
2011-05-02 17:45:40 +01:00
|
|
|
if (!ir->lhs->type->is_scalar() &&
|
|
|
|
!ir->lhs->type->is_vector()) {
|
2011-08-06 04:46:03 +01:00
|
|
|
ir->rhs->accept(this);
|
|
|
|
src_reg src = this->result;
|
|
|
|
|
|
|
|
if (ir->condition) {
|
2011-08-31 00:23:44 +01:00
|
|
|
emit_bool_to_cond_code(ir->condition, &predicate);
|
2011-08-06 04:46:03 +01:00
|
|
|
}
|
|
|
|
|
2011-08-31 00:23:44 +01:00
|
|
|
emit_block_move(&dst, &src, ir->rhs->type, predicate);
|
2011-05-02 17:45:40 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Now we're down to just a scalar/vector with writemasks. */
|
|
|
|
int i;
|
|
|
|
|
2011-08-11 16:09:10 +01:00
|
|
|
vec4_instruction *pre_rhs_inst, *last_rhs_inst;
|
|
|
|
pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
|
|
|
|
|
2011-05-02 17:45:40 +01:00
|
|
|
ir->rhs->accept(this);
|
2011-08-11 16:09:10 +01:00
|
|
|
|
|
|
|
last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
|
|
|
|
|
2011-05-02 17:45:40 +01:00
|
|
|
src_reg src = this->result;
|
|
|
|
|
|
|
|
int swizzles[4];
|
|
|
|
int first_enabled_chan = 0;
|
|
|
|
int src_chan = 0;
|
|
|
|
|
2011-08-06 00:31:30 +01:00
|
|
|
assert(ir->lhs->type->is_vector() ||
|
|
|
|
ir->lhs->type->is_scalar());
|
2011-05-02 17:45:40 +01:00
|
|
|
dst.writemask = ir->write_mask;
|
|
|
|
|
|
|
|
for (int i = 0; i < 4; i++) {
|
|
|
|
if (dst.writemask & (1 << i)) {
|
|
|
|
first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Swizzle a small RHS vector into the channels being written.
|
|
|
|
*
|
|
|
|
* glsl ir treats write_mask as dictating how many channels are
|
|
|
|
* present on the RHS while in our instructions we need to make
|
|
|
|
* those channels appear in the slots of the vec4 they're written to.
|
|
|
|
*/
|
|
|
|
for (int i = 0; i < 4; i++) {
|
|
|
|
if (dst.writemask & (1 << i))
|
|
|
|
swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
|
|
|
|
else
|
|
|
|
swizzles[i] = first_enabled_chan;
|
|
|
|
}
|
|
|
|
src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
|
|
|
|
swizzles[2], swizzles[3]);
|
|
|
|
|
2011-08-11 16:09:10 +01:00
|
|
|
if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2011-05-02 17:45:40 +01:00
|
|
|
if (ir->condition) {
|
2011-08-31 00:23:44 +01:00
|
|
|
emit_bool_to_cond_code(ir->condition, &predicate);
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < type_size(ir->lhs->type); i++) {
|
2011-08-27 18:52:16 +01:00
|
|
|
vec4_instruction *inst = emit(MOV(dst, src));
|
2011-08-31 00:23:44 +01:00
|
|
|
inst->predicate = predicate;
|
2011-05-02 17:45:40 +01:00
|
|
|
|
|
|
|
dst.reg_offset++;
|
|
|
|
src.reg_offset++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2011-08-06 05:22:36 +01:00
|
|
|
vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
|
2011-05-02 17:45:40 +01:00
|
|
|
{
|
|
|
|
if (ir->type->base_type == GLSL_TYPE_STRUCT) {
|
2011-08-06 05:22:36 +01:00
|
|
|
foreach_list(node, &ir->components) {
|
|
|
|
ir_constant *field_value = (ir_constant *)node;
|
2011-05-02 17:45:40 +01:00
|
|
|
|
2011-08-06 05:22:36 +01:00
|
|
|
emit_constant_values(dst, field_value);
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ir->type->is_array()) {
|
|
|
|
for (unsigned int i = 0; i < ir->type->length; i++) {
|
2011-08-06 05:22:36 +01:00
|
|
|
emit_constant_values(dst, ir->array_elements[i]);
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ir->type->is_matrix()) {
|
|
|
|
for (int i = 0; i < ir->type->matrix_columns; i++) {
|
|
|
|
for (int j = 0; j < ir->type->vector_elements; j++) {
|
2011-08-06 05:22:36 +01:00
|
|
|
dst->writemask = 1 << j;
|
|
|
|
dst->type = BRW_REGISTER_TYPE_F;
|
|
|
|
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(MOV(*dst,
|
|
|
|
src_reg(ir->value.f[i * ir->type->vector_elements + j])));
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
2011-08-06 05:22:36 +01:00
|
|
|
dst->reg_offset++;
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2011-08-06 00:18:00 +01:00
|
|
|
for (int i = 0; i < ir->type->vector_elements; i++) {
|
2011-08-06 05:22:36 +01:00
|
|
|
dst->writemask = 1 << i;
|
|
|
|
dst->type = brw_type_for_base_type(ir->type);
|
2011-05-02 17:45:40 +01:00
|
|
|
|
|
|
|
switch (ir->type->base_type) {
|
|
|
|
case GLSL_TYPE_FLOAT:
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(MOV(*dst, src_reg(ir->value.f[i])));
|
2011-05-02 17:45:40 +01:00
|
|
|
break;
|
|
|
|
case GLSL_TYPE_INT:
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(MOV(*dst, src_reg(ir->value.i[i])));
|
2011-05-02 17:45:40 +01:00
|
|
|
break;
|
|
|
|
case GLSL_TYPE_UINT:
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(MOV(*dst, src_reg(ir->value.u[i])));
|
2011-05-02 17:45:40 +01:00
|
|
|
break;
|
|
|
|
case GLSL_TYPE_BOOL:
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(MOV(*dst, src_reg(ir->value.b[i])));
|
2011-05-02 17:45:40 +01:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
assert(!"Non-float/uint/int/bool constant");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2011-08-06 05:22:36 +01:00
|
|
|
dst->reg_offset++;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
vec4_visitor::visit(ir_constant *ir)
|
|
|
|
{
|
|
|
|
dst_reg dst = dst_reg(this, ir->type);
|
|
|
|
this->result = src_reg(dst);
|
|
|
|
|
|
|
|
emit_constant_values(&dst, ir);
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
vec4_visitor::visit(ir_call *ir)
|
|
|
|
{
|
|
|
|
assert(!"not reached");
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
vec4_visitor::visit(ir_texture *ir)
|
|
|
|
{
|
2011-08-12 13:32:25 +01:00
|
|
|
/* FINISHME: Implement vertex texturing.
|
|
|
|
*
|
|
|
|
* With 0 vertex samplers available, the linker will reject
|
|
|
|
* programs that do vertex texturing, but after our visitor has
|
|
|
|
* run.
|
|
|
|
*/
|
2011-09-09 00:09:31 +01:00
|
|
|
this->result = src_reg(this, glsl_type::vec4_type);
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
vec4_visitor::visit(ir_return *ir)
|
|
|
|
{
|
|
|
|
assert(!"not reached");
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
vec4_visitor::visit(ir_discard *ir)
|
|
|
|
{
|
|
|
|
assert(!"not reached");
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
vec4_visitor::visit(ir_if *ir)
|
|
|
|
{
|
2011-08-06 03:12:16 +01:00
|
|
|
/* Don't point the annotation at the if statement, because then it plus
|
|
|
|
* the then and else blocks get printed.
|
|
|
|
*/
|
2011-05-02 17:45:40 +01:00
|
|
|
this->base_ir = ir->condition;
|
|
|
|
|
2011-08-06 03:12:16 +01:00
|
|
|
if (intel->gen == 6) {
|
|
|
|
emit_if_gen6(ir);
|
|
|
|
} else {
|
2011-08-31 00:23:44 +01:00
|
|
|
uint32_t predicate;
|
|
|
|
emit_bool_to_cond_code(ir->condition, &predicate);
|
|
|
|
emit(IF(predicate));
|
2011-08-06 03:12:16 +01:00
|
|
|
}
|
2011-05-02 17:45:40 +01:00
|
|
|
|
|
|
|
visit_instructions(&ir->then_instructions);
|
|
|
|
|
|
|
|
if (!ir->else_instructions.is_empty()) {
|
|
|
|
this->base_ir = ir->condition;
|
|
|
|
emit(BRW_OPCODE_ELSE);
|
|
|
|
|
|
|
|
visit_instructions(&ir->else_instructions);
|
|
|
|
}
|
|
|
|
|
|
|
|
this->base_ir = ir->condition;
|
|
|
|
emit(BRW_OPCODE_ENDIF);
|
|
|
|
}
|
|
|
|
|
2011-08-23 18:26:15 +01:00
|
|
|
void
|
|
|
|
vec4_visitor::emit_ndc_computation()
|
2011-05-02 17:45:40 +01:00
|
|
|
{
|
|
|
|
/* Get the position */
|
|
|
|
src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]);
|
|
|
|
|
|
|
|
/* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
|
|
|
|
dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
|
2011-08-23 18:17:34 +01:00
|
|
|
output_reg[BRW_VERT_RESULT_NDC] = ndc;
|
2011-05-02 17:45:40 +01:00
|
|
|
|
|
|
|
current_annotation = "NDC";
|
|
|
|
dst_reg ndc_w = ndc;
|
|
|
|
ndc_w.writemask = WRITEMASK_W;
|
|
|
|
src_reg pos_w = pos;
|
|
|
|
pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
|
|
|
|
emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
|
|
|
|
|
|
|
|
dst_reg ndc_xyz = ndc;
|
|
|
|
ndc_xyz.writemask = WRITEMASK_XYZ;
|
|
|
|
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(MUL(ndc_xyz, pos, src_reg(ndc_w)));
|
2011-08-23 18:26:15 +01:00
|
|
|
}
|
|
|
|
|
2011-08-23 18:29:48 +01:00
|
|
|
void
|
|
|
|
vec4_visitor::emit_psiz_and_flags(struct brw_reg reg)
|
2011-08-23 18:26:15 +01:00
|
|
|
{
|
2011-08-23 18:29:48 +01:00
|
|
|
if (intel->gen < 6 &&
|
|
|
|
((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
|
2011-09-27 20:33:28 +01:00
|
|
|
c->key.userclip_active || brw->has_negative_rhw_bug)) {
|
2011-05-02 17:45:40 +01:00
|
|
|
dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
|
2011-09-24 05:36:17 +01:00
|
|
|
dst_reg header1_w = header1;
|
|
|
|
header1_w.writemask = WRITEMASK_W;
|
2011-05-02 17:45:40 +01:00
|
|
|
GLuint i;
|
|
|
|
|
2011-08-27 18:52:16 +01:00
|
|
|
emit(MOV(header1, 0u));
|
2011-05-02 17:45:40 +01:00
|
|
|
|
|
|
|
if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
|
2011-08-30 23:34:43 +01:00
|
|
|
src_reg psiz = src_reg(output_reg[VERT_RESULT_PSIZ]);
|
2011-05-02 17:45:40 +01:00
|
|
|
|
2011-08-30 23:34:43 +01:00
|
|
|
current_annotation = "Point size";
|
2011-09-24 05:36:17 +01:00
|
|
|
emit(MUL(header1_w, psiz, src_reg((float)(1 << 11))));
|
|
|
|
emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8));
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
|
|
|
|
2011-08-30 23:34:43 +01:00
|
|
|
current_annotation = "Clipping flags";
|
i965 Gen6+: De-compact clip planes.
Previously, if the user enabled a non-consecutive set of clip planes
(e.g. 0, 1, and 3), the driver would compact them down to a
consecutive set starting at 0. This optimization was of dubious
value, and complicated the implementation of gl_ClipDistance.
This patch changes the driver so that with Gen6 and later chipsets, we
no longer compact the clip planes. However, we still discard any clip
planes beyond the highest number that is in use, so performance should
not be affected for applications that use clip planes consecutively
from 0.
With chipsets previous to Gen6, we still compact the clip planes,
since the pre-Gen6 clipper thread relies on this behavior.
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-09-27 20:57:08 +01:00
|
|
|
for (i = 0; i < c->key.nr_userclip_plane_consts; i++) {
|
2011-05-02 17:45:40 +01:00
|
|
|
vec4_instruction *inst;
|
|
|
|
|
2011-08-23 18:17:34 +01:00
|
|
|
inst = emit(DP4(dst_null_f(), src_reg(output_reg[VERT_RESULT_HPOS]),
|
i965 new VS: don't share clip plane constants in pre-GEN6
In pre-GEN6, when using clip planes, both the vertex shader and the
clipper need access to the client-supplied clip planes, since the
vertex shader needs them to set the clip flags, and the clipper needs
them to determine where to insert new vertices.
With the old VS backend, we used a clever optimization to avoid
placing duplicate copies of these planes in the CURBE: we used the
same block of memory for both the clipper and vertex shader constants,
with the clip planes at the front of it, and then we instructed the
clipper to read just the initial part of this block containing the
clip planes.
This optimization was tricky, of dubious value, and not completely
working in the new VS backend, so I've removed it. Now, when using
the new VS backend, separate parts of the CURBE are used for the
clipper and the vertex shader. Note that this doesn't affect the
number of push constants available to the vertex shader, it simply
causes the CURBE to occupy a few more bytes of URB memory.
The old VS backend is unaffected. GEN6+, which does clipping entirely
in hardware, is also unaffected.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2011-09-24 05:33:50 +01:00
|
|
|
src_reg(this->userplane[i])));
|
2011-05-02 17:45:40 +01:00
|
|
|
inst->conditional_mod = BRW_CONDITIONAL_L;
|
|
|
|
|
2011-09-24 05:36:17 +01:00
|
|
|
inst = emit(OR(header1_w, src_reg(header1_w), 1u << i));
|
2011-05-02 17:45:40 +01:00
|
|
|
inst->predicate = BRW_PREDICATE_NORMAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* i965 clipping workaround:
|
|
|
|
* 1) Test for -ve rhw
|
|
|
|
* 2) If set,
|
|
|
|
* set ndc = (0,0,0,0)
|
|
|
|
* set ucp[6] = 1
|
|
|
|
*
|
|
|
|
* Later, clipping will detect ucp[6] and ensure the primitive is
|
|
|
|
* clipped against all fixed planes.
|
|
|
|
*/
|
|
|
|
if (brw->has_negative_rhw_bug) {
|
|
|
|
#if 0
|
|
|
|
/* FINISHME */
|
|
|
|
brw_CMP(p,
|
|
|
|
vec8(brw_null_reg()),
|
|
|
|
BRW_CONDITIONAL_L,
|
2011-08-23 18:17:34 +01:00
|
|
|
brw_swizzle1(output_reg[BRW_VERT_RESULT_NDC], 3),
|
2011-05-02 17:45:40 +01:00
|
|
|
brw_imm_f(0));
|
|
|
|
|
|
|
|
brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
|
2011-08-23 18:17:34 +01:00
|
|
|
brw_MOV(p, output_reg[BRW_VERT_RESULT_NDC], brw_imm_f(0));
|
2011-05-02 17:45:40 +01:00
|
|
|
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2011-08-30 23:34:43 +01:00
|
|
|
emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1)));
|
2011-08-23 18:29:48 +01:00
|
|
|
} else if (intel->gen < 6) {
|
|
|
|
emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u));
|
2011-05-02 17:45:40 +01:00
|
|
|
} else {
|
2011-08-23 18:29:48 +01:00
|
|
|
emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)));
|
|
|
|
if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
|
|
|
|
emit(MOV(brw_writemask(reg, WRITEMASK_W),
|
|
|
|
src_reg(output_reg[VERT_RESULT_PSIZ])));
|
|
|
|
}
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
2011-08-23 18:29:48 +01:00
|
|
|
}
|
|
|
|
|
2011-08-23 18:41:31 +01:00
|
|
|
void
|
|
|
|
vec4_visitor::emit_clip_distances(struct brw_reg reg, int offset)
|
|
|
|
{
|
|
|
|
if (intel->gen < 6) {
|
|
|
|
/* Clip distance slots are set aside in gen5, but they are not used. It
|
|
|
|
* is not clear whether we actually need to set aside space for them,
|
|
|
|
* but the performance cost is negligible.
|
|
|
|
*/
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2011-09-26 21:43:04 +01:00
|
|
|
/* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
|
|
|
|
*
|
|
|
|
* "If a linked set of shaders forming the vertex stage contains no
|
|
|
|
* static write to gl_ClipVertex or gl_ClipDistance, but the
|
|
|
|
* application has requested clipping against user clip planes through
|
|
|
|
* the API, then the coordinate written to gl_Position is used for
|
|
|
|
* comparison against the user clip planes."
|
|
|
|
*
|
|
|
|
* This function is only called if the shader didn't write to
|
|
|
|
* gl_ClipDistance. Accordingly, we use gl_ClipVertex to perform clipping
|
|
|
|
* if the user wrote to it; otherwise we use gl_Position.
|
|
|
|
*/
|
|
|
|
gl_vert_result clip_vertex = VERT_RESULT_CLIP_VERTEX;
|
|
|
|
if (!(c->prog_data.outputs_written
|
|
|
|
& BITFIELD64_BIT(VERT_RESULT_CLIP_VERTEX))) {
|
|
|
|
clip_vertex = VERT_RESULT_HPOS;
|
|
|
|
}
|
|
|
|
|
i965 Gen6+: De-compact clip planes.
Previously, if the user enabled a non-consecutive set of clip planes
(e.g. 0, 1, and 3), the driver would compact them down to a
consecutive set starting at 0. This optimization was of dubious
value, and complicated the implementation of gl_ClipDistance.
This patch changes the driver so that with Gen6 and later chipsets, we
no longer compact the clip planes. However, we still discard any clip
planes beyond the highest number that is in use, so performance should
not be affected for applications that use clip planes consecutively
from 0.
With chipsets previous to Gen6, we still compact the clip planes,
since the pre-Gen6 clipper thread relies on this behavior.
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
2011-09-27 20:57:08 +01:00
|
|
|
for (int i = 0; i + offset < c->key.nr_userclip_plane_consts && i < 4;
|
|
|
|
++i) {
|
2011-08-23 18:41:31 +01:00
|
|
|
emit(DP4(dst_reg(brw_writemask(reg, 1 << i)),
|
2011-09-26 21:43:04 +01:00
|
|
|
src_reg(output_reg[clip_vertex]),
|
i965 new VS: don't share clip plane constants in pre-GEN6
In pre-GEN6, when using clip planes, both the vertex shader and the
clipper need access to the client-supplied clip planes, since the
vertex shader needs them to set the clip flags, and the clipper needs
them to determine where to insert new vertices.
With the old VS backend, we used a clever optimization to avoid
placing duplicate copies of these planes in the CURBE: we used the
same block of memory for both the clipper and vertex shader constants,
with the clip planes at the front of it, and then we instructed the
clipper to read just the initial part of this block containing the
clip planes.
This optimization was tricky, of dubious value, and not completely
working in the new VS backend, so I've removed it. Now, when using
the new VS backend, separate parts of the CURBE are used for the
clipper and the vertex shader. Note that this doesn't affect the
number of push constants available to the vertex shader, it simply
causes the CURBE to occupy a few more bytes of URB memory.
The old VS backend is unaffected. GEN6+, which does clipping entirely
in hardware, is also unaffected.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2011-09-24 05:33:50 +01:00
|
|
|
src_reg(this->userplane[i + offset])));
|
2011-08-23 18:41:31 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-09-02 20:36:09 +01:00
|
|
|
void
|
|
|
|
vec4_visitor::emit_generic_urb_slot(dst_reg reg, int vert_result)
|
|
|
|
{
|
|
|
|
assert (vert_result < VERT_RESULT_MAX);
|
|
|
|
current_annotation = output_reg_annotation[vert_result];
|
|
|
|
/* Copy the register, saturating if necessary */
|
|
|
|
vec4_instruction *inst = emit(MOV(reg,
|
|
|
|
src_reg(output_reg[vert_result])));
|
|
|
|
if ((vert_result == VERT_RESULT_COL0 ||
|
|
|
|
vert_result == VERT_RESULT_COL1 ||
|
|
|
|
vert_result == VERT_RESULT_BFC0 ||
|
|
|
|
vert_result == VERT_RESULT_BFC1) &&
|
|
|
|
c->key.clamp_vertex_color) {
|
|
|
|
inst->saturate = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-08-23 19:07:56 +01:00
|
|
|
void
|
|
|
|
vec4_visitor::emit_urb_slot(int mrf, int vert_result)
|
2011-08-23 18:29:48 +01:00
|
|
|
{
|
2011-09-06 20:32:33 +01:00
|
|
|
struct brw_reg hw_reg = brw_message_reg(mrf);
|
|
|
|
dst_reg reg = dst_reg(MRF, mrf);
|
|
|
|
reg.type = BRW_REGISTER_TYPE_F;
|
2011-05-02 17:45:40 +01:00
|
|
|
|
2011-08-23 19:07:56 +01:00
|
|
|
switch (vert_result) {
|
|
|
|
case VERT_RESULT_PSIZ:
|
|
|
|
/* PSIZ is always in slot 0, and is coupled with other flags. */
|
|
|
|
current_annotation = "indices, point width, clip flags";
|
2011-09-06 20:32:33 +01:00
|
|
|
emit_psiz_and_flags(hw_reg);
|
2011-08-23 19:07:56 +01:00
|
|
|
break;
|
|
|
|
case BRW_VERT_RESULT_NDC:
|
2011-05-02 17:45:40 +01:00
|
|
|
current_annotation = "NDC";
|
2011-08-23 19:07:56 +01:00
|
|
|
emit(MOV(reg, src_reg(output_reg[BRW_VERT_RESULT_NDC])));
|
|
|
|
break;
|
|
|
|
case BRW_VERT_RESULT_HPOS_DUPLICATE:
|
|
|
|
case VERT_RESULT_HPOS:
|
2011-05-02 17:45:40 +01:00
|
|
|
current_annotation = "gl_Position";
|
2011-08-23 19:07:56 +01:00
|
|
|
emit(MOV(reg, src_reg(output_reg[VERT_RESULT_HPOS])));
|
|
|
|
break;
|
2011-09-02 20:36:09 +01:00
|
|
|
case VERT_RESULT_CLIP_DIST0:
|
|
|
|
case VERT_RESULT_CLIP_DIST1:
|
|
|
|
if (this->c->key.uses_clip_distance) {
|
|
|
|
emit_generic_urb_slot(reg, vert_result);
|
|
|
|
} else {
|
|
|
|
current_annotation = "user clip distances";
|
|
|
|
emit_clip_distances(hw_reg, (vert_result - VERT_RESULT_CLIP_DIST0) * 4);
|
|
|
|
}
|
2011-08-23 19:07:56 +01:00
|
|
|
break;
|
|
|
|
case BRW_VERT_RESULT_PAD:
|
|
|
|
/* No need to write to this slot */
|
|
|
|
break;
|
2011-09-02 20:36:09 +01:00
|
|
|
default:
|
|
|
|
emit_generic_urb_slot(reg, vert_result);
|
2011-08-23 19:07:56 +01:00
|
|
|
break;
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
|
|
|
|
{
|
|
|
|
struct intel_context *intel = &brw->intel;
|
|
|
|
|
|
|
|
if (intel->gen >= 6) {
|
|
|
|
/* URB data written (does not include the message header reg) must
|
|
|
|
* be a multiple of 256 bits, or 2 VS registers. See vol5c.5,
|
|
|
|
* section 5.4.3.2.2: URB_INTERLEAVED.
|
|
|
|
*
|
|
|
|
* URB entries are allocated on a multiple of 1024 bits, so an
|
|
|
|
* extra 128 bits written here to make the end align to 256 is
|
|
|
|
* no problem.
|
|
|
|
*/
|
|
|
|
if ((mlen % 2) != 1)
|
|
|
|
mlen++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return mlen;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Generates the VUE payload plus the 1 or 2 URB write instructions to
|
|
|
|
* complete the VS thread.
|
|
|
|
*
|
|
|
|
* The VUE layout is documented in Volume 2a.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
vec4_visitor::emit_urb_writes()
|
|
|
|
{
|
2011-08-07 23:21:25 +01:00
|
|
|
/* MRF 0 is reserved for the debugger, so start with message header
|
|
|
|
* in MRF 1.
|
|
|
|
*/
|
2011-05-02 17:45:40 +01:00
|
|
|
int base_mrf = 1;
|
|
|
|
int mrf = base_mrf;
|
2011-08-07 23:21:25 +01:00
|
|
|
/* In the process of generating our URB write message contents, we
|
|
|
|
* may need to unspill a register or load from an array. Those
|
|
|
|
* reads would use MRFs 14-15.
|
|
|
|
*/
|
|
|
|
int max_usable_mrf = 13;
|
2011-05-02 17:45:40 +01:00
|
|
|
|
2011-08-23 19:41:41 +01:00
|
|
|
/* The following assertion verifies that max_usable_mrf causes an
|
|
|
|
* even-numbered amount of URB write data, which will meet gen6's
|
|
|
|
* requirements for length alignment.
|
|
|
|
*/
|
|
|
|
assert ((max_usable_mrf - base_mrf) % 2 == 0);
|
|
|
|
|
2011-05-02 17:45:40 +01:00
|
|
|
/* FINISHME: edgeflag */
|
|
|
|
|
2011-09-27 20:33:28 +01:00
|
|
|
brw_compute_vue_map(&c->vue_map, intel, c->key.userclip_active,
|
2011-09-03 16:42:28 +01:00
|
|
|
c->prog_data.outputs_written);
|
2011-08-23 19:07:56 +01:00
|
|
|
|
2011-05-02 17:45:40 +01:00
|
|
|
/* First mrf is the g0-based message header containing URB handles and such,
|
|
|
|
* which is implied in VS_OPCODE_URB_WRITE.
|
|
|
|
*/
|
|
|
|
mrf++;
|
|
|
|
|
2011-08-23 19:07:56 +01:00
|
|
|
if (intel->gen < 6) {
|
|
|
|
emit_ndc_computation();
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
|
|
|
|
2011-08-06 05:53:00 +01:00
|
|
|
/* Set up the VUE data for the first URB write */
|
2011-08-23 19:07:56 +01:00
|
|
|
int slot;
|
|
|
|
for (slot = 0; slot < c->vue_map.num_slots; ++slot) {
|
|
|
|
emit_urb_slot(mrf++, c->vue_map.slot_to_vert_result[slot]);
|
2011-05-02 17:45:40 +01:00
|
|
|
|
2011-08-23 19:41:41 +01:00
|
|
|
/* If this was max_usable_mrf, we can't fit anything more into this URB
|
|
|
|
* WRITE.
|
2011-05-02 17:45:40 +01:00
|
|
|
*/
|
2011-08-07 23:21:25 +01:00
|
|
|
if (mrf > max_usable_mrf) {
|
2011-08-23 19:07:56 +01:00
|
|
|
slot++;
|
2011-05-02 17:45:40 +01:00
|
|
|
break;
|
2011-08-06 05:53:00 +01:00
|
|
|
}
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
|
|
|
|
2011-09-06 21:29:21 +01:00
|
|
|
current_annotation = "URB write";
|
2011-05-02 17:45:40 +01:00
|
|
|
vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
|
|
|
|
inst->base_mrf = base_mrf;
|
|
|
|
inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
|
2011-08-23 19:07:56 +01:00
|
|
|
inst->eot = (slot >= c->vue_map.num_slots);
|
2011-05-02 17:45:40 +01:00
|
|
|
|
2011-08-06 05:53:00 +01:00
|
|
|
/* Optional second URB write */
|
2011-08-23 19:07:56 +01:00
|
|
|
if (!inst->eot) {
|
2011-08-06 05:53:00 +01:00
|
|
|
mrf = base_mrf + 1;
|
|
|
|
|
2011-08-23 19:07:56 +01:00
|
|
|
for (; slot < c->vue_map.num_slots; ++slot) {
|
2011-08-07 23:21:25 +01:00
|
|
|
assert(mrf < max_usable_mrf);
|
2011-08-06 05:53:00 +01:00
|
|
|
|
2011-08-23 19:07:56 +01:00
|
|
|
emit_urb_slot(mrf++, c->vue_map.slot_to_vert_result[slot]);
|
2011-08-06 05:53:00 +01:00
|
|
|
}
|
|
|
|
|
2011-09-06 21:29:21 +01:00
|
|
|
current_annotation = "URB write";
|
2011-08-06 05:53:00 +01:00
|
|
|
inst = emit(VS_OPCODE_URB_WRITE);
|
|
|
|
inst->base_mrf = base_mrf;
|
|
|
|
inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
|
|
|
|
inst->eot = true;
|
2011-08-07 23:21:25 +01:00
|
|
|
/* URB destination offset. In the previous write, we got MRFs
|
|
|
|
* 2-13 minus the one header MRF, so 12 regs. URB offset is in
|
2011-08-06 05:53:00 +01:00
|
|
|
* URB row increments, and each of our MRFs is half of one of
|
|
|
|
* those, since we're doing interleaved writes.
|
|
|
|
*/
|
2011-08-07 23:21:25 +01:00
|
|
|
inst->offset = (max_usable_mrf - base_mrf) / 2;
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (intel->gen == 6)
|
2011-08-23 19:43:46 +01:00
|
|
|
c->prog_data.urb_entry_size = ALIGN(c->vue_map.num_slots, 8) / 8;
|
2011-05-02 17:45:40 +01:00
|
|
|
else
|
2011-08-23 19:43:46 +01:00
|
|
|
c->prog_data.urb_entry_size = ALIGN(c->vue_map.num_slots, 4) / 4;
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
|
|
|
|
2011-08-07 20:15:26 +01:00
|
|
|
src_reg
|
|
|
|
vec4_visitor::get_scratch_offset(vec4_instruction *inst,
|
|
|
|
src_reg *reladdr, int reg_offset)
|
|
|
|
{
|
|
|
|
/* Because we store the values to scratch interleaved like our
|
|
|
|
* vertex data, we need to scale the vec4 index by 2.
|
|
|
|
*/
|
|
|
|
int message_header_scale = 2;
|
|
|
|
|
|
|
|
/* Pre-gen6, the message header uses byte offsets instead of vec4
|
|
|
|
* (16-byte) offset units.
|
|
|
|
*/
|
|
|
|
if (intel->gen < 6)
|
|
|
|
message_header_scale *= 16;
|
|
|
|
|
|
|
|
if (reladdr) {
|
|
|
|
src_reg index = src_reg(this, glsl_type::int_type);
|
|
|
|
|
2011-08-27 19:13:33 +01:00
|
|
|
emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
|
|
|
|
emit_before(inst, MUL(dst_reg(index),
|
|
|
|
index, src_reg(message_header_scale)));
|
2011-08-07 20:15:26 +01:00
|
|
|
|
|
|
|
return index;
|
|
|
|
} else {
|
|
|
|
return src_reg(reg_offset * message_header_scale);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-08-22 18:35:24 +01:00
|
|
|
src_reg
|
|
|
|
vec4_visitor::get_pull_constant_offset(vec4_instruction *inst,
|
|
|
|
src_reg *reladdr, int reg_offset)
|
|
|
|
{
|
|
|
|
if (reladdr) {
|
|
|
|
src_reg index = src_reg(this, glsl_type::int_type);
|
|
|
|
|
2011-08-27 19:13:33 +01:00
|
|
|
emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
|
2011-08-22 18:35:24 +01:00
|
|
|
|
|
|
|
/* Pre-gen6, the message header uses byte offsets instead of vec4
|
|
|
|
* (16-byte) offset units.
|
|
|
|
*/
|
|
|
|
if (intel->gen < 6) {
|
2011-08-27 19:13:33 +01:00
|
|
|
emit_before(inst, MUL(dst_reg(index), index, src_reg(16)));
|
2011-08-22 18:35:24 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return index;
|
|
|
|
} else {
|
|
|
|
int message_header_scale = intel->gen < 6 ? 16 : 1;
|
|
|
|
return src_reg(reg_offset * message_header_scale);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-08-07 20:15:26 +01:00
|
|
|
/**
|
|
|
|
* Emits an instruction before @inst to load the value named by @orig_src
|
|
|
|
* from scratch space at @base_offset to @temp.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
vec4_visitor::emit_scratch_read(vec4_instruction *inst,
|
|
|
|
dst_reg temp, src_reg orig_src,
|
|
|
|
int base_offset)
|
|
|
|
{
|
|
|
|
int reg_offset = base_offset + orig_src.reg_offset;
|
|
|
|
src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset);
|
|
|
|
|
2011-08-27 19:13:33 +01:00
|
|
|
emit_before(inst, SCRATCH_READ(temp, index));
|
2011-08-07 20:15:26 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Emits an instruction after @inst to store the value to be written
|
|
|
|
* to @orig_dst to scratch space at @base_offset, from @temp.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
vec4_visitor::emit_scratch_write(vec4_instruction *inst,
|
|
|
|
src_reg temp, dst_reg orig_dst,
|
|
|
|
int base_offset)
|
|
|
|
{
|
|
|
|
int reg_offset = base_offset + orig_dst.reg_offset;
|
|
|
|
src_reg index = get_scratch_offset(inst, orig_dst.reladdr, reg_offset);
|
|
|
|
|
|
|
|
dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
|
|
|
|
orig_dst.writemask));
|
2011-08-27 19:13:33 +01:00
|
|
|
vec4_instruction *write = SCRATCH_WRITE(dst, temp, index);
|
|
|
|
write->predicate = inst->predicate;
|
|
|
|
write->ir = inst->ir;
|
|
|
|
write->annotation = inst->annotation;
|
|
|
|
inst->insert_after(write);
|
2011-08-07 20:15:26 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* We can't generally support array access in GRF space, because a
|
|
|
|
* single instruction's destination can only span 2 contiguous
|
|
|
|
* registers. So, we send all GRF arrays that get variable index
|
|
|
|
* access to scratch space.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
vec4_visitor::move_grf_array_access_to_scratch()
|
|
|
|
{
|
|
|
|
int scratch_loc[this->virtual_grf_count];
|
|
|
|
|
|
|
|
for (int i = 0; i < this->virtual_grf_count; i++) {
|
|
|
|
scratch_loc[i] = -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* First, calculate the set of virtual GRFs that need to be punted
|
|
|
|
* to scratch due to having any array access on them, and where in
|
|
|
|
* scratch.
|
|
|
|
*/
|
|
|
|
foreach_list(node, &this->instructions) {
|
|
|
|
vec4_instruction *inst = (vec4_instruction *)node;
|
|
|
|
|
|
|
|
if (inst->dst.file == GRF && inst->dst.reladdr &&
|
|
|
|
scratch_loc[inst->dst.reg] == -1) {
|
|
|
|
scratch_loc[inst->dst.reg] = c->last_scratch;
|
|
|
|
c->last_scratch += this->virtual_grf_sizes[inst->dst.reg] * 8 * 4;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (int i = 0 ; i < 3; i++) {
|
|
|
|
src_reg *src = &inst->src[i];
|
|
|
|
|
|
|
|
if (src->file == GRF && src->reladdr &&
|
|
|
|
scratch_loc[src->reg] == -1) {
|
|
|
|
scratch_loc[src->reg] = c->last_scratch;
|
|
|
|
c->last_scratch += this->virtual_grf_sizes[src->reg] * 8 * 4;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Now, for anything that will be accessed through scratch, rewrite
|
|
|
|
* it to load/store. Note that this is a _safe list walk, because
|
|
|
|
* we may generate a new scratch_write instruction after the one
|
|
|
|
* we're processing.
|
|
|
|
*/
|
|
|
|
foreach_list_safe(node, &this->instructions) {
|
|
|
|
vec4_instruction *inst = (vec4_instruction *)node;
|
|
|
|
|
|
|
|
/* Set up the annotation tracking for new generated instructions. */
|
|
|
|
base_ir = inst->ir;
|
|
|
|
current_annotation = inst->annotation;
|
|
|
|
|
|
|
|
if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
|
|
|
|
src_reg temp = src_reg(this, glsl_type::vec4_type);
|
|
|
|
|
|
|
|
emit_scratch_write(inst, temp, inst->dst, scratch_loc[inst->dst.reg]);
|
|
|
|
|
|
|
|
inst->dst.file = temp.file;
|
|
|
|
inst->dst.reg = temp.reg;
|
|
|
|
inst->dst.reg_offset = temp.reg_offset;
|
|
|
|
inst->dst.reladdr = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (int i = 0 ; i < 3; i++) {
|
|
|
|
if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
dst_reg temp = dst_reg(this, glsl_type::vec4_type);
|
|
|
|
|
|
|
|
emit_scratch_read(inst, temp, inst->src[i],
|
|
|
|
scratch_loc[inst->src[i].reg]);
|
|
|
|
|
|
|
|
inst->src[i].file = temp.file;
|
|
|
|
inst->src[i].reg = temp.reg;
|
|
|
|
inst->src[i].reg_offset = temp.reg_offset;
|
|
|
|
inst->src[i].reladdr = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-08-22 18:35:24 +01:00
|
|
|
/**
|
|
|
|
* Emits an instruction before @inst to load the value named by @orig_src
|
|
|
|
* from the pull constant buffer (surface) at @base_offset to @temp.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
vec4_visitor::emit_pull_constant_load(vec4_instruction *inst,
|
|
|
|
dst_reg temp, src_reg orig_src,
|
|
|
|
int base_offset)
|
|
|
|
{
|
|
|
|
int reg_offset = base_offset + orig_src.reg_offset;
|
|
|
|
src_reg index = get_pull_constant_offset(inst, orig_src.reladdr, reg_offset);
|
|
|
|
vec4_instruction *load;
|
|
|
|
|
|
|
|
load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
|
|
|
|
temp, index);
|
|
|
|
load->base_mrf = 14;
|
|
|
|
load->mlen = 1;
|
2011-08-27 19:13:33 +01:00
|
|
|
emit_before(inst, load);
|
2011-08-22 18:35:24 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Implements array access of uniforms by inserting a
|
|
|
|
* PULL_CONSTANT_LOAD instruction.
|
|
|
|
*
|
|
|
|
* Unlike temporary GRF array access (where we don't support it due to
|
|
|
|
* the difficulty of doing relative addressing on instruction
|
|
|
|
* destinations), we could potentially do array access of uniforms
|
|
|
|
* that were loaded in GRF space as push constants. In real-world
|
|
|
|
* usage we've seen, though, the arrays being used are always larger
|
|
|
|
* than we could load as push constants, so just always move all
|
|
|
|
* uniform array access out to a pull constant buffer.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
vec4_visitor::move_uniform_array_access_to_pull_constants()
|
|
|
|
{
|
|
|
|
int pull_constant_loc[this->uniforms];
|
|
|
|
|
|
|
|
for (int i = 0; i < this->uniforms; i++) {
|
|
|
|
pull_constant_loc[i] = -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Walk through and find array access of uniforms. Put a copy of that
|
|
|
|
* uniform in the pull constant buffer.
|
|
|
|
*
|
|
|
|
* Note that we don't move constant-indexed accesses to arrays. No
|
|
|
|
* testing has been done of the performance impact of this choice.
|
|
|
|
*/
|
|
|
|
foreach_list_safe(node, &this->instructions) {
|
|
|
|
vec4_instruction *inst = (vec4_instruction *)node;
|
|
|
|
|
|
|
|
for (int i = 0 ; i < 3; i++) {
|
|
|
|
if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
int uniform = inst->src[i].reg;
|
|
|
|
|
|
|
|
/* If this array isn't already present in the pull constant buffer,
|
|
|
|
* add it.
|
|
|
|
*/
|
|
|
|
if (pull_constant_loc[uniform] == -1) {
|
|
|
|
const float **values = &prog_data->param[uniform * 4];
|
|
|
|
|
2011-09-07 01:46:25 +01:00
|
|
|
pull_constant_loc[uniform] = prog_data->nr_pull_params / 4;
|
2011-08-22 18:35:24 +01:00
|
|
|
|
|
|
|
for (int j = 0; j < uniform_size[uniform] * 4; j++) {
|
|
|
|
prog_data->pull_param[prog_data->nr_pull_params++] = values[j];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Set up the annotation tracking for new generated instructions. */
|
|
|
|
base_ir = inst->ir;
|
|
|
|
current_annotation = inst->annotation;
|
|
|
|
|
|
|
|
dst_reg temp = dst_reg(this, glsl_type::vec4_type);
|
|
|
|
|
|
|
|
emit_pull_constant_load(inst, temp, inst->src[i],
|
|
|
|
pull_constant_loc[uniform]);
|
|
|
|
|
|
|
|
inst->src[i].file = temp.file;
|
|
|
|
inst->src[i].reg = temp.reg;
|
|
|
|
inst->src[i].reg_offset = temp.reg_offset;
|
|
|
|
inst->src[i].reladdr = NULL;
|
|
|
|
}
|
|
|
|
}
|
2011-08-23 18:22:50 +01:00
|
|
|
|
|
|
|
/* Now there are no accesses of the UNIFORM file with a reladdr, so
|
|
|
|
* no need to track them as larger-than-vec4 objects. This will be
|
|
|
|
* relied on in cutting out unused uniform vectors from push
|
|
|
|
* constants.
|
|
|
|
*/
|
|
|
|
split_uniform_registers();
|
2011-08-22 18:35:24 +01:00
|
|
|
}
|
2011-08-07 20:15:26 +01:00
|
|
|
|
2011-10-03 23:31:52 +01:00
|
|
|
void
|
|
|
|
vec4_visitor::resolve_ud_negate(src_reg *reg)
|
|
|
|
{
|
|
|
|
if (reg->type != BRW_REGISTER_TYPE_UD ||
|
|
|
|
!reg->negate)
|
|
|
|
return;
|
|
|
|
|
|
|
|
src_reg temp = src_reg(this, glsl_type::uvec4_type);
|
|
|
|
emit(BRW_OPCODE_MOV, dst_reg(temp), *reg);
|
|
|
|
*reg = temp;
|
|
|
|
}
|
|
|
|
|
2011-05-02 17:45:40 +01:00
|
|
|
vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
|
|
|
|
struct gl_shader_program *prog,
|
|
|
|
struct brw_shader *shader)
|
|
|
|
{
|
|
|
|
this->c = c;
|
|
|
|
this->p = &c->func;
|
|
|
|
this->brw = p->brw;
|
|
|
|
this->intel = &brw->intel;
|
|
|
|
this->ctx = &intel->ctx;
|
|
|
|
this->prog = prog;
|
|
|
|
this->shader = shader;
|
|
|
|
|
|
|
|
this->mem_ctx = ralloc_context(NULL);
|
|
|
|
this->failed = false;
|
|
|
|
|
|
|
|
this->base_ir = NULL;
|
|
|
|
this->current_annotation = NULL;
|
|
|
|
|
|
|
|
this->c = c;
|
2011-08-20 21:51:36 +01:00
|
|
|
this->vp = (struct gl_vertex_program *)
|
|
|
|
prog->_LinkedShaders[MESA_SHADER_VERTEX]->Program;
|
2011-05-02 17:45:40 +01:00
|
|
|
this->prog_data = &c->prog_data;
|
|
|
|
|
|
|
|
this->variable_ht = hash_table_ctor(0,
|
|
|
|
hash_table_pointer_hash,
|
|
|
|
hash_table_pointer_compare);
|
|
|
|
|
2011-08-16 23:09:48 +01:00
|
|
|
this->virtual_grf_def = NULL;
|
|
|
|
this->virtual_grf_use = NULL;
|
2011-05-02 17:45:40 +01:00
|
|
|
this->virtual_grf_sizes = NULL;
|
|
|
|
this->virtual_grf_count = 0;
|
2011-09-01 16:34:18 +01:00
|
|
|
this->virtual_grf_reg_map = NULL;
|
|
|
|
this->virtual_grf_reg_count = 0;
|
2011-05-02 17:45:40 +01:00
|
|
|
this->virtual_grf_array_size = 0;
|
2011-08-16 23:09:48 +01:00
|
|
|
this->live_intervals_valid = false;
|
2011-05-04 20:50:16 +01:00
|
|
|
|
|
|
|
this->uniforms = 0;
|
|
|
|
|
|
|
|
this->variable_ht = hash_table_ctor(0,
|
|
|
|
hash_table_pointer_hash,
|
|
|
|
hash_table_pointer_compare);
|
2011-05-02 17:45:40 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
vec4_visitor::~vec4_visitor()
|
|
|
|
{
|
2011-08-16 04:43:42 +01:00
|
|
|
ralloc_free(this->mem_ctx);
|
2011-05-02 17:45:40 +01:00
|
|
|
hash_table_dtor(this->variable_ht);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
vec4_visitor::fail(const char *format, ...)
|
|
|
|
{
|
|
|
|
va_list va;
|
|
|
|
char *msg;
|
|
|
|
|
|
|
|
if (failed)
|
|
|
|
return;
|
|
|
|
|
|
|
|
failed = true;
|
|
|
|
|
|
|
|
va_start(va, format);
|
|
|
|
msg = ralloc_vasprintf(mem_ctx, format, va);
|
|
|
|
va_end(va);
|
|
|
|
msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
|
|
|
|
|
|
|
|
this->fail_msg = msg;
|
|
|
|
|
|
|
|
if (INTEL_DEBUG & DEBUG_VS) {
|
|
|
|
fprintf(stderr, "%s", msg);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
} /* namespace brw */
|