i965: Replace brw_wm_* with dumping code into the fs_visitor.
This makes a giant pile of code newly dead. It also fixes TXB on newer chipsets, which has been totally broken (I now have a piglit test for that). It passes the same set of Ian's ARB_fragment_program tests. It also improves high-settings ETQW performance by 3.2 +/- 1.9% (n=3), thanks to better optimization and having 8-wide along with 16-wide shaders. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=24355 Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
parent
014aaa97d3
commit
97615b2d8c
|
@ -119,6 +119,7 @@ i965_CXX_FILES = \
|
|||
brw_fs_cse.cpp \
|
||||
brw_fs_copy_propagation.cpp \
|
||||
brw_fs_emit.cpp \
|
||||
brw_fs_fp.cpp \
|
||||
brw_fs_live_variables.cpp \
|
||||
brw_fs_visitor.cpp \
|
||||
brw_fs_channel_expressions.cpp \
|
||||
|
|
|
@ -1999,11 +1999,15 @@ fs_visitor::run()
|
|||
/* Generate FS IR for main(). (the visitor only descends into
|
||||
* functions called "main").
|
||||
*/
|
||||
foreach_list(node, &*shader->ir) {
|
||||
ir_instruction *ir = (ir_instruction *)node;
|
||||
base_ir = ir;
|
||||
this->result = reg_undef;
|
||||
ir->accept(this);
|
||||
if (shader) {
|
||||
foreach_list(node, &*shader->ir) {
|
||||
ir_instruction *ir = (ir_instruction *)node;
|
||||
base_ir = ir;
|
||||
this->result = reg_undef;
|
||||
ir->accept(this);
|
||||
}
|
||||
} else {
|
||||
emit_fragment_program_code();
|
||||
}
|
||||
if (failed)
|
||||
return false;
|
||||
|
@ -2084,24 +2088,26 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c,
|
|||
bool start_busy = false;
|
||||
float start_time = 0;
|
||||
|
||||
if (!prog)
|
||||
return false;
|
||||
|
||||
if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
|
||||
start_busy = (intel->batch.last_bo &&
|
||||
drm_intel_bo_busy(intel->batch.last_bo));
|
||||
start_time = get_time();
|
||||
}
|
||||
|
||||
struct brw_shader *shader =
|
||||
(brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
|
||||
if (!shader)
|
||||
return false;
|
||||
struct brw_shader *shader = NULL;
|
||||
if (prog)
|
||||
shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
|
||||
|
||||
if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
|
||||
printf("GLSL IR for native fragment shader %d:\n", prog->Name);
|
||||
_mesa_print_ir(shader->ir, NULL);
|
||||
printf("\n\n");
|
||||
if (shader) {
|
||||
printf("GLSL IR for native fragment shader %d:\n", prog->Name);
|
||||
_mesa_print_ir(shader->ir, NULL);
|
||||
printf("\n\n");
|
||||
} else {
|
||||
printf("ARB_fragment_program %d ir for native fragment shader\n",
|
||||
c->fp->program.Base.Id);
|
||||
_mesa_print_program(&c->fp->program.Base);
|
||||
}
|
||||
}
|
||||
|
||||
/* Now the main event: Visit the shader IR and generate our FS IR for it.
|
||||
|
|
|
@ -177,7 +177,7 @@ public:
|
|||
/** @{
|
||||
* Annotation for the generated IR. One of the two can be set.
|
||||
*/
|
||||
ir_instruction *ir;
|
||||
const void *ir;
|
||||
const char *annotation;
|
||||
/** @} */
|
||||
};
|
||||
|
@ -324,6 +324,29 @@ public:
|
|||
void emit_if_gen6(ir_if *ir);
|
||||
void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset);
|
||||
|
||||
void emit_fragment_program_code();
|
||||
void setup_fp_regs();
|
||||
fs_reg get_fp_src_reg(const prog_src_register *src);
|
||||
fs_reg get_fp_dst_reg(const prog_dst_register *dst);
|
||||
void emit_fp_alu1(enum opcode opcode,
|
||||
const struct prog_instruction *fpi,
|
||||
fs_reg dst, fs_reg src);
|
||||
void emit_fp_alu2(enum opcode opcode,
|
||||
const struct prog_instruction *fpi,
|
||||
fs_reg dst, fs_reg src0, fs_reg src1);
|
||||
void emit_fp_scalar_write(const struct prog_instruction *fpi,
|
||||
fs_reg dst, fs_reg src);
|
||||
void emit_fp_scalar_math(enum opcode opcode,
|
||||
const struct prog_instruction *fpi,
|
||||
fs_reg dst, fs_reg src);
|
||||
|
||||
void emit_fp_minmax(const struct prog_instruction *fpi,
|
||||
fs_reg dst, fs_reg src0, fs_reg src1);
|
||||
|
||||
void emit_fp_sop(uint32_t conditional_mod,
|
||||
const struct prog_instruction *fpi,
|
||||
fs_reg dst, fs_reg src0, fs_reg src1, fs_reg one);
|
||||
|
||||
void emit_color_write(int target, int index, int first_color_mrf);
|
||||
void emit_fb_writes();
|
||||
bool try_rewrite_rhs_to_dst(ir_assignment *ir,
|
||||
|
@ -381,9 +404,12 @@ public:
|
|||
int max_grf;
|
||||
int urb_setup[FRAG_ATTRIB_MAX];
|
||||
|
||||
fs_reg *fp_temp_regs;
|
||||
fs_reg *fp_input_regs;
|
||||
|
||||
/** @{ debug annotation info */
|
||||
const char *current_annotation;
|
||||
ir_instruction *base_ir;
|
||||
const void *base_ir;
|
||||
/** @} */
|
||||
|
||||
bool failed;
|
||||
|
|
|
@ -726,11 +726,16 @@ fs_visitor::generate_code()
|
|||
{
|
||||
int last_native_insn_offset = p->next_insn_offset;
|
||||
const char *last_annotation_string = NULL;
|
||||
ir_instruction *last_annotation_ir = NULL;
|
||||
const void *last_annotation_ir = NULL;
|
||||
|
||||
if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
|
||||
printf("Native code for fragment shader %d (%d-wide dispatch):\n",
|
||||
prog->Name, c->dispatch_width);
|
||||
if (shader) {
|
||||
printf("Native code for fragment shader %d (%d-wide dispatch):\n",
|
||||
prog->Name, c->dispatch_width);
|
||||
} else {
|
||||
printf("Native code for fragment program %d (%d-wide dispatch):\n",
|
||||
c->fp->program.Base.Id, c->dispatch_width);
|
||||
}
|
||||
}
|
||||
|
||||
fs_cfg *cfg = NULL;
|
||||
|
@ -762,7 +767,16 @@ fs_visitor::generate_code()
|
|||
last_annotation_ir = inst->ir;
|
||||
if (last_annotation_ir) {
|
||||
printf(" ");
|
||||
last_annotation_ir->print();
|
||||
if (shader)
|
||||
((ir_instruction *)inst->ir)->print();
|
||||
else {
|
||||
const prog_instruction *fpi;
|
||||
fpi = (const prog_instruction *)inst->ir;
|
||||
printf("%d: ", (int)(fpi - fp->Base.Instructions));
|
||||
_mesa_fprint_instruction_opt(stdout,
|
||||
fpi,
|
||||
0, PROG_PRINT_DEBUG, NULL);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,784 @@
|
|||
/*
|
||||
* Copyright © 2012 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/** @file brw_fs_fp.cpp
|
||||
*
|
||||
* Implementation of the compiler for GL_ARB_fragment_program shaders on top
|
||||
* of the GLSL compiler backend.
|
||||
*/
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_fs.h"
|
||||
|
||||
static fs_reg
|
||||
regoffset(fs_reg reg, int i)
|
||||
{
|
||||
reg.reg_offset += i;
|
||||
return reg;
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::emit_fp_alu1(enum opcode opcode,
|
||||
const struct prog_instruction *fpi,
|
||||
fs_reg dst, fs_reg src)
|
||||
{
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (fpi->DstReg.WriteMask & (1 << i))
|
||||
emit(opcode, regoffset(dst, i), regoffset(src, i));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::emit_fp_alu2(enum opcode opcode,
|
||||
const struct prog_instruction *fpi,
|
||||
fs_reg dst, fs_reg src0, fs_reg src1)
|
||||
{
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (fpi->DstReg.WriteMask & (1 << i))
|
||||
emit(opcode, regoffset(dst, i),
|
||||
regoffset(src0, i), regoffset(src1, i));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::emit_fp_minmax(const prog_instruction *fpi,
|
||||
fs_reg dst, fs_reg src0, fs_reg src1)
|
||||
{
|
||||
uint32_t conditionalmod;
|
||||
if (fpi->Opcode == OPCODE_MIN)
|
||||
conditionalmod = BRW_CONDITIONAL_L;
|
||||
else
|
||||
conditionalmod = BRW_CONDITIONAL_GE;
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (fpi->DstReg.WriteMask & (1 << i)) {
|
||||
emit_minmax(conditionalmod, regoffset(dst, i),
|
||||
regoffset(src0, i), regoffset(src1, i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::emit_fp_sop(uint32_t conditional_mod,
|
||||
const struct prog_instruction *fpi,
|
||||
fs_reg dst, fs_reg src0, fs_reg src1,
|
||||
fs_reg one)
|
||||
{
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (fpi->DstReg.WriteMask & (1 << i)) {
|
||||
fs_inst *inst;
|
||||
|
||||
inst = emit(BRW_OPCODE_CMP, fs_reg(brw_null_reg()),
|
||||
regoffset(src0, i), regoffset(src1, i));
|
||||
inst->conditional_mod = conditional_mod;
|
||||
|
||||
inst = emit(BRW_OPCODE_SEL, regoffset(dst, i), one, fs_reg(0.0f));
|
||||
inst->predicated = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::emit_fp_scalar_write(const struct prog_instruction *fpi,
|
||||
fs_reg dst, fs_reg src)
|
||||
{
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (fpi->DstReg.WriteMask & (1 << i))
|
||||
emit(BRW_OPCODE_MOV, regoffset(dst, i), src);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::emit_fp_scalar_math(enum opcode opcode,
|
||||
const struct prog_instruction *fpi,
|
||||
fs_reg dst, fs_reg src)
|
||||
{
|
||||
fs_reg temp = fs_reg(this, glsl_type::float_type);
|
||||
emit_math(opcode, temp, src);
|
||||
emit_fp_scalar_write(fpi, dst, temp);
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::emit_fragment_program_code()
|
||||
{
|
||||
setup_fp_regs();
|
||||
|
||||
fs_reg null = fs_reg(brw_null_reg());
|
||||
|
||||
/* Keep a reg with 1.0 around, for reuse by emit_fp_sop so that it can just
|
||||
* be:
|
||||
*
|
||||
* sel.f0 dst 1.0 0.0
|
||||
*
|
||||
* instead of
|
||||
*
|
||||
* mov dst 0.0
|
||||
* mov.f0 dst 1.0
|
||||
*/
|
||||
fs_reg one = fs_reg(this, glsl_type::float_type);
|
||||
emit(BRW_OPCODE_MOV, one, fs_reg(1.0f));
|
||||
|
||||
for (unsigned int insn = 0; insn < fp->Base.NumInstructions; insn++) {
|
||||
const struct prog_instruction *fpi = &fp->Base.Instructions[insn];
|
||||
base_ir = fpi;
|
||||
|
||||
//_mesa_print_instruction(fpi);
|
||||
|
||||
fs_reg dst;
|
||||
fs_reg src[3];
|
||||
|
||||
/* We always emit into a temporary destination register to avoid
|
||||
* aliasing issues.
|
||||
*/
|
||||
dst = fs_reg(this, glsl_type::vec4_type);
|
||||
|
||||
for (int i = 0; i < 3; i++)
|
||||
src[i] = get_fp_src_reg(&fpi->SrcReg[i]);
|
||||
|
||||
switch (fpi->Opcode) {
|
||||
case OPCODE_ABS:
|
||||
src[0].abs = true;
|
||||
src[0].negate = false;
|
||||
emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
|
||||
break;
|
||||
|
||||
case OPCODE_ADD:
|
||||
emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case OPCODE_CMP:
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (fpi->DstReg.WriteMask & (1 << i)) {
|
||||
fs_inst *inst;
|
||||
|
||||
inst = emit(BRW_OPCODE_CMP, null,
|
||||
regoffset(src[0], i), fs_reg(0.0f));
|
||||
inst->conditional_mod = BRW_CONDITIONAL_L;
|
||||
|
||||
inst = emit(BRW_OPCODE_SEL, regoffset(dst, i),
|
||||
regoffset(src[1], i), regoffset(src[2], i));
|
||||
inst->predicated = true;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case OPCODE_COS:
|
||||
emit_fp_scalar_math(SHADER_OPCODE_COS, fpi, dst, src[0]);
|
||||
break;
|
||||
|
||||
case OPCODE_DP2:
|
||||
case OPCODE_DP3:
|
||||
case OPCODE_DP4:
|
||||
case OPCODE_DPH: {
|
||||
fs_reg mul = fs_reg(this, glsl_type::float_type);
|
||||
fs_reg acc = fs_reg(this, glsl_type::float_type);
|
||||
int count;
|
||||
|
||||
switch (fpi->Opcode) {
|
||||
case OPCODE_DP2: count = 2; break;
|
||||
case OPCODE_DP3: count = 3; break;
|
||||
case OPCODE_DP4: count = 4; break;
|
||||
case OPCODE_DPH: count = 3; break;
|
||||
default: assert(!"not reached"); count = 0; break;
|
||||
}
|
||||
|
||||
emit(BRW_OPCODE_MUL, acc,
|
||||
regoffset(src[0], 0), regoffset(src[1], 0));
|
||||
for (int i = 1; i < count; i++) {
|
||||
emit(BRW_OPCODE_MUL, mul,
|
||||
regoffset(src[0], i), regoffset(src[1], i));
|
||||
emit(BRW_OPCODE_ADD, acc, acc, mul);
|
||||
}
|
||||
|
||||
if (fpi->Opcode == OPCODE_DPH)
|
||||
emit(BRW_OPCODE_ADD, acc, acc, regoffset(src[1], 3));
|
||||
|
||||
emit_fp_scalar_write(fpi, dst, acc);
|
||||
break;
|
||||
}
|
||||
|
||||
case OPCODE_DST:
|
||||
if (fpi->DstReg.WriteMask & WRITEMASK_X)
|
||||
emit(BRW_OPCODE_MOV, dst, fs_reg(1.0f));
|
||||
if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
|
||||
emit(BRW_OPCODE_MUL, regoffset(dst, 1),
|
||||
regoffset(src[0], 1), regoffset(src[1], 1));
|
||||
}
|
||||
if (fpi->DstReg.WriteMask & WRITEMASK_Z)
|
||||
emit(BRW_OPCODE_MOV, regoffset(dst, 2), regoffset(src[0], 2));
|
||||
if (fpi->DstReg.WriteMask & WRITEMASK_W)
|
||||
emit(BRW_OPCODE_MOV, regoffset(dst, 3), regoffset(src[1], 3));
|
||||
break;
|
||||
|
||||
case OPCODE_EX2:
|
||||
emit_fp_scalar_math(SHADER_OPCODE_EXP2, fpi, dst, src[0]);
|
||||
break;
|
||||
|
||||
case OPCODE_FLR:
|
||||
emit_fp_alu1(BRW_OPCODE_RNDD, fpi, dst, src[0]);
|
||||
break;
|
||||
|
||||
case OPCODE_FRC:
|
||||
emit_fp_alu1(BRW_OPCODE_FRC, fpi, dst, src[0]);
|
||||
break;
|
||||
|
||||
case OPCODE_KIL: {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
/* In most cases the argument to a KIL will be something like
|
||||
* TEMP[0].wwww, so there's no point in checking whether .w is < 0
|
||||
* 4 times in a row.
|
||||
*/
|
||||
if (i > 0 &&
|
||||
GET_SWZ(fpi->SrcReg[0].Swizzle, i) ==
|
||||
GET_SWZ(fpi->SrcReg[0].Swizzle, i - 1) &&
|
||||
((fpi->SrcReg[0].Negate >> i) & 1) ==
|
||||
((fpi->SrcReg[0].Negate >> (i - 1)) & 1)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
fs_inst *inst = emit(BRW_OPCODE_CMP, null,
|
||||
regoffset(src[0], i), 0.0f);
|
||||
inst->conditional_mod = BRW_CONDITIONAL_L;
|
||||
|
||||
inst = emit(BRW_OPCODE_IF);
|
||||
inst->predicated = true;
|
||||
emit(FS_OPCODE_DISCARD);
|
||||
emit(BRW_OPCODE_ENDIF);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case OPCODE_LG2:
|
||||
emit_fp_scalar_math(SHADER_OPCODE_LOG2, fpi, dst, src[0]);
|
||||
break;
|
||||
|
||||
case OPCODE_LIT:
|
||||
/* From the ARB_fragment_program spec:
|
||||
*
|
||||
* tmp = VectorLoad(op0);
|
||||
* if (tmp.x < 0) tmp.x = 0;
|
||||
* if (tmp.y < 0) tmp.y = 0;
|
||||
* if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
|
||||
* else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
|
||||
* result.x = 1.0;
|
||||
* result.y = tmp.x;
|
||||
* result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
|
||||
* result.w = 1.0;
|
||||
*
|
||||
* Note that we don't do the clamping to +/- 128. We didn't in
|
||||
* brw_wm_emit.c either.
|
||||
*/
|
||||
if (fpi->DstReg.WriteMask & WRITEMASK_X)
|
||||
emit(BRW_OPCODE_MOV, regoffset(dst, 0), fs_reg(1.0f));
|
||||
|
||||
if (fpi->DstReg.WriteMask & WRITEMASK_YZ) {
|
||||
fs_inst *inst;
|
||||
inst = emit(BRW_OPCODE_CMP, null,
|
||||
regoffset(src[0], 0), fs_reg(0.0f));
|
||||
inst->conditional_mod = BRW_CONDITIONAL_LE;
|
||||
|
||||
if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
|
||||
emit(BRW_OPCODE_MOV, regoffset(dst, 1), regoffset(src[0], 0));
|
||||
inst = emit(BRW_OPCODE_MOV, regoffset(dst, 1), fs_reg(0.0f));
|
||||
inst->predicated = true;
|
||||
}
|
||||
|
||||
if (fpi->DstReg.WriteMask & WRITEMASK_Z) {
|
||||
emit_math(SHADER_OPCODE_POW, regoffset(dst, 2),
|
||||
regoffset(src[0], 1), regoffset(src[0], 3));
|
||||
|
||||
inst = emit(BRW_OPCODE_MOV, regoffset(dst, 2), fs_reg(0.0f));
|
||||
inst->predicated = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (fpi->DstReg.WriteMask & WRITEMASK_W)
|
||||
emit(BRW_OPCODE_MOV, regoffset(dst, 3), fs_reg(1.0f));
|
||||
|
||||
break;
|
||||
|
||||
case OPCODE_LRP:
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (fpi->DstReg.WriteMask & (1 << i)) {
|
||||
fs_reg neg_src0 = regoffset(src[0], i);
|
||||
neg_src0.negate = !neg_src0.negate;
|
||||
fs_reg temp = fs_reg(this, glsl_type::float_type);
|
||||
fs_reg temp2 = fs_reg(this, glsl_type::float_type);
|
||||
emit(BRW_OPCODE_ADD, temp, neg_src0, fs_reg(1.0f));
|
||||
emit(BRW_OPCODE_MUL, temp, temp, regoffset(src[2], i));
|
||||
emit(BRW_OPCODE_MUL, temp2,
|
||||
regoffset(src[0], i), regoffset(src[1], i));
|
||||
emit(BRW_OPCODE_ADD, regoffset(dst, i), temp, temp2);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case OPCODE_MAD:
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (fpi->DstReg.WriteMask & (1 << i)) {
|
||||
fs_reg temp = fs_reg(this, glsl_type::float_type);
|
||||
emit(BRW_OPCODE_MUL, temp,
|
||||
regoffset(src[0], i), regoffset(src[1], i));
|
||||
emit(BRW_OPCODE_ADD, regoffset(dst, i),
|
||||
temp, regoffset(src[2], i));
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case OPCODE_MAX:
|
||||
emit_fp_minmax(fpi, dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case OPCODE_MOV:
|
||||
emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
|
||||
break;
|
||||
|
||||
case OPCODE_MIN:
|
||||
emit_fp_minmax(fpi, dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case OPCODE_MUL:
|
||||
emit_fp_alu2(BRW_OPCODE_MUL, fpi, dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case OPCODE_POW: {
|
||||
fs_reg temp = fs_reg(this, glsl_type::float_type);
|
||||
emit_math(SHADER_OPCODE_POW, temp, src[0], src[1]);
|
||||
emit_fp_scalar_write(fpi, dst, temp);
|
||||
break;
|
||||
}
|
||||
|
||||
case OPCODE_RCP:
|
||||
emit_fp_scalar_math(SHADER_OPCODE_RCP, fpi, dst, src[0]);
|
||||
break;
|
||||
|
||||
case OPCODE_RSQ:
|
||||
emit_fp_scalar_math(SHADER_OPCODE_RSQ, fpi, dst, src[0]);
|
||||
break;
|
||||
|
||||
case OPCODE_SCS:
|
||||
if (fpi->DstReg.WriteMask & WRITEMASK_X) {
|
||||
emit_math(SHADER_OPCODE_COS, regoffset(dst, 0),
|
||||
regoffset(src[0], 0));
|
||||
}
|
||||
|
||||
if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
|
||||
emit_math(SHADER_OPCODE_SIN, regoffset(dst, 1),
|
||||
regoffset(src[0], 1));
|
||||
}
|
||||
break;
|
||||
|
||||
case OPCODE_SGE:
|
||||
emit_fp_sop(BRW_CONDITIONAL_GE, fpi, dst, src[0], src[1], one);
|
||||
break;
|
||||
|
||||
case OPCODE_SIN:
|
||||
emit_fp_scalar_math(SHADER_OPCODE_SIN, fpi, dst, src[0]);
|
||||
break;
|
||||
|
||||
case OPCODE_SLT:
|
||||
emit_fp_sop(BRW_CONDITIONAL_L, fpi, dst, src[0], src[1], one);
|
||||
break;
|
||||
|
||||
case OPCODE_SUB: {
|
||||
fs_reg neg_src1 = src[1];
|
||||
neg_src1.negate = !src[1].negate;
|
||||
|
||||
emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], neg_src1);
|
||||
break;
|
||||
}
|
||||
|
||||
case OPCODE_TEX:
|
||||
case OPCODE_TXB:
|
||||
case OPCODE_TXP: {
|
||||
/* We piggy-back on the GLSL IR support for texture setup. To do so,
|
||||
* we have to cook up an ir_texture that has the coordinate field
|
||||
* with appropriate type, and shadow_comparitor set or not. All the
|
||||
* other properties of ir_texture are passed in as arguments to the
|
||||
* emit_texture_gen* function.
|
||||
*/
|
||||
ir_texture *ir = NULL;
|
||||
|
||||
fs_reg lod;
|
||||
fs_reg dpdy;
|
||||
fs_reg coordinate = src[0];
|
||||
fs_reg shadow_c;
|
||||
|
||||
switch (fpi->Opcode) {
|
||||
case OPCODE_TEX:
|
||||
ir = new(mem_ctx) ir_texture(ir_tex);
|
||||
break;
|
||||
case OPCODE_TXP: {
|
||||
ir = new(mem_ctx) ir_texture(ir_tex);
|
||||
|
||||
coordinate = fs_reg(this, glsl_type::vec3_type);
|
||||
fs_reg invproj = fs_reg(this, glsl_type::float_type);
|
||||
emit_math(SHADER_OPCODE_RCP, invproj, regoffset(src[0], 3));
|
||||
for (int i = 0; i < 3; i++) {
|
||||
emit(BRW_OPCODE_MUL, regoffset(coordinate, i),
|
||||
regoffset(src[0], i), invproj);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OPCODE_TXB:
|
||||
ir = new(mem_ctx) ir_texture(ir_txb);
|
||||
lod = regoffset(src[0], 3);
|
||||
break;
|
||||
default:
|
||||
assert(!"not reached");
|
||||
break;
|
||||
}
|
||||
|
||||
const glsl_type *coordinate_type;
|
||||
switch (fpi->TexSrcTarget) {
|
||||
case TEXTURE_1D_INDEX:
|
||||
coordinate_type = glsl_type::float_type;
|
||||
break;
|
||||
|
||||
case TEXTURE_2D_INDEX:
|
||||
case TEXTURE_1D_ARRAY_INDEX:
|
||||
case TEXTURE_RECT_INDEX:
|
||||
case TEXTURE_EXTERNAL_INDEX:
|
||||
coordinate_type = glsl_type::vec2_type;
|
||||
break;
|
||||
|
||||
case TEXTURE_3D_INDEX:
|
||||
case TEXTURE_2D_ARRAY_INDEX:
|
||||
coordinate_type = glsl_type::vec3_type;
|
||||
break;
|
||||
|
||||
case TEXTURE_CUBE_INDEX: {
|
||||
coordinate_type = glsl_type::vec3_type;
|
||||
|
||||
fs_reg temp = fs_reg(this, glsl_type::float_type);
|
||||
fs_reg cubecoord = fs_reg(this, glsl_type::vec3_type);
|
||||
fs_reg abscoord = coordinate;
|
||||
abscoord.negate = false;
|
||||
abscoord.abs = true;
|
||||
emit_minmax(BRW_CONDITIONAL_GE, temp,
|
||||
regoffset(abscoord, 0), regoffset(abscoord, 1));
|
||||
emit_minmax(BRW_CONDITIONAL_GE, temp,
|
||||
temp, regoffset(abscoord, 2));
|
||||
emit_math(SHADER_OPCODE_RCP, temp, temp);
|
||||
for (int i = 0; i < 3; i++) {
|
||||
emit(BRW_OPCODE_MUL, regoffset(cubecoord, i),
|
||||
regoffset(coordinate, i), temp);
|
||||
}
|
||||
|
||||
coordinate = cubecoord;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
assert(!"not reached");
|
||||
coordinate_type = glsl_type::vec2_type;
|
||||
break;
|
||||
}
|
||||
|
||||
ir_constant_data junk_data;
|
||||
ir->coordinate = new(mem_ctx) ir_constant(coordinate_type, &junk_data);
|
||||
|
||||
coordinate = rescale_texcoord(ir, coordinate,
|
||||
fpi->TexSrcTarget == TEXTURE_RECT_INDEX,
|
||||
fpi->TexSrcUnit, fpi->TexSrcUnit);
|
||||
|
||||
if (fpi->TexShadow) {
|
||||
shadow_c = regoffset(coordinate, 2);
|
||||
ir->shadow_comparitor = new(mem_ctx) ir_constant(0.0f);
|
||||
}
|
||||
|
||||
fs_inst *inst;
|
||||
if (intel->gen >= 7) {
|
||||
inst = emit_texture_gen7(ir, dst, coordinate, shadow_c, lod, dpdy);
|
||||
} else if (intel->gen >= 5) {
|
||||
inst = emit_texture_gen5(ir, dst, coordinate, shadow_c, lod, dpdy);
|
||||
} else {
|
||||
inst = emit_texture_gen4(ir, dst, coordinate, shadow_c, lod, dpdy);
|
||||
}
|
||||
|
||||
inst->sampler = fpi->TexSrcUnit;
|
||||
inst->shadow_compare = fpi->TexShadow;
|
||||
|
||||
/* Reuse the GLSL swizzle_result() handler. */
|
||||
swizzle_result(ir, dst, fpi->TexSrcUnit);
|
||||
dst = this->result;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case OPCODE_SWZ:
|
||||
/* Note that SWZ's extended swizzles are handled in the general
|
||||
* get_src_reg() code.
|
||||
*/
|
||||
emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
|
||||
break;
|
||||
|
||||
case OPCODE_XPD:
|
||||
for (int i = 0; i < 3; i++) {
|
||||
if (fpi->DstReg.WriteMask & (1 << i)) {
|
||||
int i1 = (i + 1) % 3;
|
||||
int i2 = (i + 2) % 3;
|
||||
|
||||
fs_reg temp = fs_reg(this, glsl_type::float_type);
|
||||
fs_reg neg_src1_1 = regoffset(src[1], i1);
|
||||
neg_src1_1.negate = !neg_src1_1.negate;
|
||||
emit(BRW_OPCODE_MUL, temp,
|
||||
regoffset(src[0], i2), neg_src1_1);
|
||||
emit(BRW_OPCODE_MUL, regoffset(dst, i),
|
||||
regoffset(src[0], i1), regoffset(src[1], i2));
|
||||
emit(BRW_OPCODE_ADD, regoffset(dst, i),
|
||||
regoffset(dst, i), temp);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case OPCODE_END:
|
||||
break;
|
||||
|
||||
default:
|
||||
_mesa_problem(ctx, "Unsupported opcode %s in fragment program\n",
|
||||
_mesa_opcode_string(fpi->Opcode));
|
||||
}
|
||||
|
||||
/* To handle saturates, we emit a MOV with a saturate bit, which
|
||||
* optimization should fold into the preceding instructions when safe.
|
||||
*/
|
||||
if (fpi->Opcode != OPCODE_END) {
|
||||
fs_reg real_dst = get_fp_dst_reg(&fpi->DstReg);
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (fpi->DstReg.WriteMask & (1 << i)) {
|
||||
fs_inst *inst = emit(BRW_OPCODE_MOV,
|
||||
regoffset(real_dst, i),
|
||||
regoffset(dst, i));
|
||||
inst->saturate = fpi->SaturateMode;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Epilogue:
|
||||
*
|
||||
* Fragment depth has this strange convention of being the .z component of
|
||||
* a vec4. emit_fb_write() wants to see a float value, instead.
|
||||
*/
|
||||
this->current_annotation = "result.depth write";
|
||||
if (frag_depth.file != BAD_FILE) {
|
||||
fs_reg temp = fs_reg(this, glsl_type::float_type);
|
||||
emit(BRW_OPCODE_MOV, temp, regoffset(frag_depth, 2));
|
||||
frag_depth = temp;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::setup_fp_regs()
|
||||
{
|
||||
/* PROGRAM_TEMPORARY */
|
||||
int num_temp = fp->Base.NumTemporaries;
|
||||
fp_temp_regs = rzalloc_array(mem_ctx, fs_reg, num_temp);
|
||||
for (int i = 0; i < num_temp; i++)
|
||||
fp_temp_regs[i] = fs_reg(this, glsl_type::vec4_type);
|
||||
|
||||
/* PROGRAM_STATE_VAR, PROGRAM_NAMED_PARAM, etc. */
|
||||
if (c->dispatch_width == 8) {
|
||||
for (unsigned p = 0;
|
||||
p < c->fp->program.Base.Parameters->NumParameters; p++) {
|
||||
for (unsigned int i = 0; i < 4; i++) {
|
||||
this->param_index[c->prog_data.nr_params] = p;
|
||||
this->param_offset[c->prog_data.nr_params] = i;
|
||||
c->prog_data.nr_params++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fp_input_regs = rzalloc_array(mem_ctx, fs_reg, FRAG_ATTRIB_MAX);
|
||||
for (int i = 0; i < FRAG_ATTRIB_MAX; i++) {
|
||||
if (fp->Base.InputsRead & BITFIELD64_BIT(i)) {
|
||||
/* Make up a dummy instruction to reuse code for emitting
|
||||
* interpolation.
|
||||
*/
|
||||
ir_variable *ir = new(mem_ctx) ir_variable(glsl_type::vec4_type,
|
||||
"fp_input",
|
||||
ir_var_in);
|
||||
ir->location = i;
|
||||
|
||||
this->current_annotation = ralloc_asprintf(ctx, "interpolate input %d",
|
||||
i);
|
||||
|
||||
switch (i) {
|
||||
case FRAG_ATTRIB_WPOS:
|
||||
ir->pixel_center_integer = fp->PixelCenterInteger;
|
||||
ir->origin_upper_left = fp->OriginUpperLeft;
|
||||
fp_input_regs[i] = *emit_fragcoord_interpolation(ir);
|
||||
break;
|
||||
case FRAG_ATTRIB_FACE:
|
||||
fp_input_regs[i] = *emit_frontfacing_interpolation(ir);
|
||||
break;
|
||||
default:
|
||||
fp_input_regs[i] = *emit_general_interpolation(ir);
|
||||
|
||||
if (i == FRAG_ATTRIB_FOGC) {
|
||||
emit(BRW_OPCODE_MOV,
|
||||
regoffset(fp_input_regs[i], 1), fs_reg(0.0f));
|
||||
emit(BRW_OPCODE_MOV,
|
||||
regoffset(fp_input_regs[i], 2), fs_reg(0.0f));
|
||||
emit(BRW_OPCODE_MOV,
|
||||
regoffset(fp_input_regs[i], 3), fs_reg(1.0f));
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
this->current_annotation = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fs_reg
|
||||
fs_visitor::get_fp_dst_reg(const prog_dst_register *dst)
|
||||
{
|
||||
switch (dst->File) {
|
||||
case PROGRAM_TEMPORARY:
|
||||
return fp_temp_regs[dst->Index];
|
||||
|
||||
case PROGRAM_OUTPUT:
|
||||
if (dst->Index == FRAG_RESULT_DEPTH) {
|
||||
if (frag_depth.file == BAD_FILE)
|
||||
frag_depth = fs_reg(this, glsl_type::vec4_type);
|
||||
return frag_depth;
|
||||
} else if (dst->Index == FRAG_RESULT_COLOR) {
|
||||
if (outputs[0].file == BAD_FILE) {
|
||||
outputs[0] = fs_reg(this, glsl_type::vec4_type);
|
||||
output_components[0] = 4;
|
||||
|
||||
/* Tell emit_fb_writes() to smear fragment.color across all the
|
||||
* color attachments.
|
||||
*/
|
||||
for (int i = 1; i < c->key.nr_color_regions; i++) {
|
||||
outputs[i] = outputs[0];
|
||||
output_components[i] = output_components[0];
|
||||
}
|
||||
}
|
||||
return outputs[0];
|
||||
} else {
|
||||
int output_index = dst->Index - FRAG_RESULT_DATA0;
|
||||
if (outputs[output_index].file == BAD_FILE) {
|
||||
outputs[output_index] = fs_reg(this, glsl_type::vec4_type);
|
||||
}
|
||||
output_components[output_index] = 4;
|
||||
return outputs[output_index];
|
||||
}
|
||||
|
||||
case PROGRAM_UNDEFINED:
|
||||
return fs_reg();
|
||||
|
||||
default:
|
||||
_mesa_problem(ctx, "bad dst register file: %s\n",
|
||||
_mesa_register_file_name((gl_register_file)dst->File));
|
||||
return fs_reg(this, glsl_type::vec4_type);
|
||||
}
|
||||
}
|
||||
|
||||
fs_reg
|
||||
fs_visitor::get_fp_src_reg(const prog_src_register *src)
|
||||
{
|
||||
struct gl_program_parameter_list *plist = c->fp->program.Base.Parameters;
|
||||
|
||||
fs_reg result;
|
||||
|
||||
assert(!src->Abs);
|
||||
|
||||
switch (src->File) {
|
||||
case PROGRAM_UNDEFINED:
|
||||
return fs_reg();
|
||||
case PROGRAM_TEMPORARY:
|
||||
result = fp_temp_regs[src->Index];
|
||||
break;
|
||||
|
||||
case PROGRAM_INPUT:
|
||||
result = fp_input_regs[src->Index];
|
||||
break;
|
||||
|
||||
case PROGRAM_STATE_VAR:
|
||||
case PROGRAM_UNIFORM:
|
||||
case PROGRAM_CONSTANT:
|
||||
case PROGRAM_NAMED_PARAM:
|
||||
/* We actually want to look at the type in the Parameters list for this,
|
||||
* because this lets us upload constant builtin uniforms, as actual
|
||||
* constants.
|
||||
*/
|
||||
switch (plist->Parameters[src->Index].Type) {
|
||||
case PROGRAM_NAMED_PARAM:
|
||||
case PROGRAM_CONSTANT: {
|
||||
result = fs_reg(this, glsl_type::vec4_type);
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
emit(BRW_OPCODE_MOV, regoffset(result, i),
|
||||
fs_reg(plist->ParameterValues[src->Index][i].f));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case PROGRAM_STATE_VAR:
|
||||
case PROGRAM_UNIFORM:
|
||||
result = fs_reg(UNIFORM, src->Index * 4);
|
||||
break;
|
||||
|
||||
default:
|
||||
_mesa_problem(ctx, "bad uniform src register file: %s\n",
|
||||
_mesa_register_file_name((gl_register_file)src->File));
|
||||
return fs_reg(this, glsl_type::vec4_type);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
_mesa_problem(ctx, "bad src register file: %s\n",
|
||||
_mesa_register_file_name((gl_register_file)src->File));
|
||||
return fs_reg(this, glsl_type::vec4_type);
|
||||
}
|
||||
|
||||
if (src->Swizzle != SWIZZLE_NOOP || src->Negate) {
|
||||
fs_reg unswizzled = result;
|
||||
result = fs_reg(this, glsl_type::vec4_type);
|
||||
for (int i = 0; i < 4; i++) {
|
||||
bool negate = src->Negate & (1 << i);
|
||||
/* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ,
|
||||
* but it costs us nothing to support it.
|
||||
*/
|
||||
int src_swiz = GET_SWZ(src->Swizzle, i);
|
||||
if (src_swiz == SWIZZLE_ZERO) {
|
||||
emit(BRW_OPCODE_MOV, regoffset(result, i), fs_reg(0.0f));
|
||||
} else if (src_swiz == SWIZZLE_ONE) {
|
||||
emit(BRW_OPCODE_MOV, regoffset(result, i),
|
||||
negate ? fs_reg(-1.0f) : fs_reg(1.0f));
|
||||
} else {
|
||||
fs_reg src = regoffset(unswizzled, src_swiz);
|
||||
if (negate)
|
||||
src.negate = !src.negate;
|
||||
emit(BRW_OPCODE_MOV, regoffset(result, i), src);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
|
@ -2228,8 +2228,7 @@ fs_visitor::fs_visitor(struct brw_wm_compile *c, struct gl_shader_program *prog,
|
|||
this->c = c;
|
||||
this->p = &c->func;
|
||||
this->brw = p->brw;
|
||||
this->fp = (struct gl_fragment_program *)
|
||||
prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program;
|
||||
this->fp = &c->fp->program;
|
||||
this->prog = prog;
|
||||
this->intel = &brw->intel;
|
||||
this->ctx = &intel->ctx;
|
||||
|
|
|
@ -85,46 +85,6 @@ GLuint brw_wm_is_scalar_result( GLuint opcode )
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Do GPU code generation for non-GLSL shader. non-GLSL shaders have
|
||||
* no flow control instructions so we can more readily do SSA-style
|
||||
* optimizations.
|
||||
*/
|
||||
static void
|
||||
brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
|
||||
{
|
||||
/* Augment fragment program. Add instructions for pre- and
|
||||
* post-fragment-program tasks such as interpolation and fogging.
|
||||
*/
|
||||
brw_wm_pass_fp(c);
|
||||
|
||||
/* Translate to intermediate representation. Build register usage
|
||||
* chains.
|
||||
*/
|
||||
brw_wm_pass0(c);
|
||||
|
||||
/* Dead code removal.
|
||||
*/
|
||||
brw_wm_pass1(c);
|
||||
|
||||
/* Register allocation.
|
||||
* Divide by two because we operate on 16 pixels at a time and require
|
||||
* two GRF entries for each logical shader register.
|
||||
*/
|
||||
c->grf_limit = BRW_WM_MAX_GRF / 2;
|
||||
|
||||
brw_wm_pass2(c);
|
||||
|
||||
/* how many general-purpose registers are used */
|
||||
c->prog_data.reg_blocks = brw_register_blocks(c->max_wm_grf);
|
||||
|
||||
/* Emit GEN4 code.
|
||||
*/
|
||||
brw_wm_emit(c);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return a bitfield where bit n is set if barycentric interpolation mode n
|
||||
* (see enum brw_wm_barycentric_interp_mode) is needed by the fragment shader.
|
||||
|
@ -356,23 +316,7 @@ bool do_wm_prog(struct brw_context *brw,
|
|||
brw_compute_barycentric_interp_modes(brw, c->key.flat_shade,
|
||||
&fp->program);
|
||||
|
||||
if (prog && prog->_LinkedShaders[MESA_SHADER_FRAGMENT]) {
|
||||
if (!brw_wm_fs_emit(brw, c, prog))
|
||||
return false;
|
||||
} else {
|
||||
if (!c->instruction) {
|
||||
c->instruction = rzalloc_array(c, struct brw_wm_instruction, BRW_WM_MAX_INSN);
|
||||
c->prog_instructions = rzalloc_array(c, struct prog_instruction, BRW_WM_MAX_INSN);
|
||||
c->vreg = rzalloc_array(c, struct brw_wm_value, BRW_WM_MAX_VREG);
|
||||
c->refs = rzalloc_array(c, struct brw_wm_ref, BRW_WM_MAX_REF);
|
||||
}
|
||||
|
||||
/* Fallback for fixed function and ARB_fp shaders. */
|
||||
c->dispatch_width = 16;
|
||||
brw_wm_payload_setup(brw, c);
|
||||
brw_wm_non_glsl_emit(brw, c);
|
||||
c->prog_data.dispatch_width = 16;
|
||||
}
|
||||
brw_wm_fs_emit(brw, c, prog);
|
||||
|
||||
/* Scratch space is used for register spilling */
|
||||
if (c->last_scratch) {
|
||||
|
|
|
@ -163,23 +163,8 @@ brw_upload_wm_unit(struct brw_context *brw)
|
|||
/* _NEW_COLOR */
|
||||
wm->wm5.program_uses_killpixel = fp->UsesKill || ctx->Color.AlphaEnabled;
|
||||
|
||||
|
||||
/* BRW_NEW_FRAGMENT_PROGRAM
|
||||
*
|
||||
* If using the fragment shader backend, the program is always
|
||||
* 8-wide. If not, it's always 16.
|
||||
*/
|
||||
if (ctx->Shader._CurrentFragmentProgram) {
|
||||
struct brw_shader *shader = (struct brw_shader *)
|
||||
ctx->Shader._CurrentFragmentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT];
|
||||
|
||||
if (shader != NULL && shader->ir != NULL) {
|
||||
wm->wm5.enable_8_pix = 1;
|
||||
if (brw->wm.prog_data->prog_offset_16)
|
||||
wm->wm5.enable_16_pix = 1;
|
||||
}
|
||||
}
|
||||
if (!wm->wm5.enable_8_pix)
|
||||
wm->wm5.enable_8_pix = 1;
|
||||
if (brw->wm.prog_data->prog_offset_16)
|
||||
wm->wm5.enable_16_pix = 1;
|
||||
|
||||
wm->wm5.max_threads = brw->max_wm_threads - 1;
|
||||
|
|
|
@ -151,13 +151,9 @@ upload_wm_state(struct brw_context *brw)
|
|||
dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
|
||||
|
||||
/* CACHE_NEW_WM_PROG */
|
||||
if (brw->wm.prog_data->dispatch_width == 8) {
|
||||
dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
|
||||
if (brw->wm.prog_data->prog_offset_16)
|
||||
dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
|
||||
} else {
|
||||
dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
|
||||
if (brw->wm.prog_data->prog_offset_16)
|
||||
dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
|
||||
}
|
||||
|
||||
/* CACHE_NEW_WM_PROG | _NEW_COLOR */
|
||||
if (brw->wm.prog_data->dual_src_blend &&
|
||||
|
|
|
@ -196,13 +196,9 @@ upload_ps_state(struct brw_context *brw)
|
|||
if (brw->fragment_program->Base.InputsRead != 0)
|
||||
dw4 |= GEN7_PS_ATTRIBUTE_ENABLE;
|
||||
|
||||
if (brw->wm.prog_data->dispatch_width == 8) {
|
||||
dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
|
||||
if (brw->wm.prog_data->prog_offset_16)
|
||||
dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
|
||||
} else {
|
||||
dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
|
||||
if (brw->wm.prog_data->prog_offset_16)
|
||||
dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
|
||||
}
|
||||
|
||||
dw5 |= (brw->wm.prog_data->first_curbe_grf <<
|
||||
GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
|
||||
|
|
Loading…
Reference in New Issue