From 97615b2d8c7c3cea6fd3a43bcb1739a96e2046c4 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 27 Aug 2012 14:35:01 -0700 Subject: [PATCH] i965: Replace brw_wm_* with dumping code into the fs_visitor. This makes a giant pile of code newly dead. It also fixes TXB on newer chipsets, which has been totally broken (I now have a piglit test for that). It passes the same set of Ian's ARB_fragment_program tests. It also improves high-settings ETQW performance by 3.2 +/- 1.9% (n=3), thanks to better optimization and having 8-wide along with 16-wide shaders. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=24355 Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/Makefile.sources | 1 + src/mesa/drivers/dri/i965/brw_fs.cpp | 36 +- src/mesa/drivers/dri/i965/brw_fs.h | 30 +- src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 22 +- src/mesa/drivers/dri/i965/brw_fs_fp.cpp | 784 +++++++++++++++++++ src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 3 +- src/mesa/drivers/dri/i965/brw_wm.c | 58 +- src/mesa/drivers/dri/i965/brw_wm_state.c | 19 +- src/mesa/drivers/dri/i965/gen6_wm_state.c | 8 +- src/mesa/drivers/dri/i965/gen7_wm_state.c | 8 +- 10 files changed, 860 insertions(+), 109 deletions(-) create mode 100644 src/mesa/drivers/dri/i965/brw_fs_fp.cpp diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 3715b0f300f..edc2376815e 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -119,6 +119,7 @@ i965_CXX_FILES = \ brw_fs_cse.cpp \ brw_fs_copy_propagation.cpp \ brw_fs_emit.cpp \ + brw_fs_fp.cpp \ brw_fs_live_variables.cpp \ brw_fs_visitor.cpp \ brw_fs_channel_expressions.cpp \ diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index fea598025ef..27014133d4f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1999,11 +1999,15 @@ fs_visitor::run() /* Generate FS IR for main(). (the visitor only descends into * functions called "main"). */ - foreach_list(node, &*shader->ir) { - ir_instruction *ir = (ir_instruction *)node; - base_ir = ir; - this->result = reg_undef; - ir->accept(this); + if (shader) { + foreach_list(node, &*shader->ir) { + ir_instruction *ir = (ir_instruction *)node; + base_ir = ir; + this->result = reg_undef; + ir->accept(this); + } + } else { + emit_fragment_program_code(); } if (failed) return false; @@ -2084,24 +2088,26 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c, bool start_busy = false; float start_time = 0; - if (!prog) - return false; - if (unlikely(INTEL_DEBUG & DEBUG_PERF)) { start_busy = (intel->batch.last_bo && drm_intel_bo_busy(intel->batch.last_bo)); start_time = get_time(); } - struct brw_shader *shader = - (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; - if (!shader) - return false; + struct brw_shader *shader = NULL; + if (prog) + shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; if (unlikely(INTEL_DEBUG & DEBUG_WM)) { - printf("GLSL IR for native fragment shader %d:\n", prog->Name); - _mesa_print_ir(shader->ir, NULL); - printf("\n\n"); + if (shader) { + printf("GLSL IR for native fragment shader %d:\n", prog->Name); + _mesa_print_ir(shader->ir, NULL); + printf("\n\n"); + } else { + printf("ARB_fragment_program %d ir for native fragment shader\n", + c->fp->program.Base.Id); + _mesa_print_program(&c->fp->program.Base); + } } /* Now the main event: Visit the shader IR and generate our FS IR for it. diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 2209e416b6d..4db9e90ed5e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -177,7 +177,7 @@ public: /** @{ * Annotation for the generated IR. One of the two can be set. */ - ir_instruction *ir; + const void *ir; const char *annotation; /** @} */ }; @@ -324,6 +324,29 @@ public: void emit_if_gen6(ir_if *ir); void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset); + void emit_fragment_program_code(); + void setup_fp_regs(); + fs_reg get_fp_src_reg(const prog_src_register *src); + fs_reg get_fp_dst_reg(const prog_dst_register *dst); + void emit_fp_alu1(enum opcode opcode, + const struct prog_instruction *fpi, + fs_reg dst, fs_reg src); + void emit_fp_alu2(enum opcode opcode, + const struct prog_instruction *fpi, + fs_reg dst, fs_reg src0, fs_reg src1); + void emit_fp_scalar_write(const struct prog_instruction *fpi, + fs_reg dst, fs_reg src); + void emit_fp_scalar_math(enum opcode opcode, + const struct prog_instruction *fpi, + fs_reg dst, fs_reg src); + + void emit_fp_minmax(const struct prog_instruction *fpi, + fs_reg dst, fs_reg src0, fs_reg src1); + + void emit_fp_sop(uint32_t conditional_mod, + const struct prog_instruction *fpi, + fs_reg dst, fs_reg src0, fs_reg src1, fs_reg one); + void emit_color_write(int target, int index, int first_color_mrf); void emit_fb_writes(); bool try_rewrite_rhs_to_dst(ir_assignment *ir, @@ -381,9 +404,12 @@ public: int max_grf; int urb_setup[FRAG_ATTRIB_MAX]; + fs_reg *fp_temp_regs; + fs_reg *fp_input_regs; + /** @{ debug annotation info */ const char *current_annotation; - ir_instruction *base_ir; + const void *base_ir; /** @} */ bool failed; diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index e477a6168a9..aa60ed571da 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -726,11 +726,16 @@ fs_visitor::generate_code() { int last_native_insn_offset = p->next_insn_offset; const char *last_annotation_string = NULL; - ir_instruction *last_annotation_ir = NULL; + const void *last_annotation_ir = NULL; if (unlikely(INTEL_DEBUG & DEBUG_WM)) { - printf("Native code for fragment shader %d (%d-wide dispatch):\n", - prog->Name, c->dispatch_width); + if (shader) { + printf("Native code for fragment shader %d (%d-wide dispatch):\n", + prog->Name, c->dispatch_width); + } else { + printf("Native code for fragment program %d (%d-wide dispatch):\n", + c->fp->program.Base.Id, c->dispatch_width); + } } fs_cfg *cfg = NULL; @@ -762,7 +767,16 @@ fs_visitor::generate_code() last_annotation_ir = inst->ir; if (last_annotation_ir) { printf(" "); - last_annotation_ir->print(); + if (shader) + ((ir_instruction *)inst->ir)->print(); + else { + const prog_instruction *fpi; + fpi = (const prog_instruction *)inst->ir; + printf("%d: ", (int)(fpi - fp->Base.Instructions)); + _mesa_fprint_instruction_opt(stdout, + fpi, + 0, PROG_PRINT_DEBUG, NULL); + } printf("\n"); } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp new file mode 100644 index 00000000000..be00f6ea8f5 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp @@ -0,0 +1,784 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** @file brw_fs_fp.cpp + * + * Implementation of the compiler for GL_ARB_fragment_program shaders on top + * of the GLSL compiler backend. + */ + +#include "brw_context.h" +#include "brw_fs.h" + +static fs_reg +regoffset(fs_reg reg, int i) +{ + reg.reg_offset += i; + return reg; +} + +void +fs_visitor::emit_fp_alu1(enum opcode opcode, + const struct prog_instruction *fpi, + fs_reg dst, fs_reg src) +{ + for (int i = 0; i < 4; i++) { + if (fpi->DstReg.WriteMask & (1 << i)) + emit(opcode, regoffset(dst, i), regoffset(src, i)); + } +} + +void +fs_visitor::emit_fp_alu2(enum opcode opcode, + const struct prog_instruction *fpi, + fs_reg dst, fs_reg src0, fs_reg src1) +{ + for (int i = 0; i < 4; i++) { + if (fpi->DstReg.WriteMask & (1 << i)) + emit(opcode, regoffset(dst, i), + regoffset(src0, i), regoffset(src1, i)); + } +} + +void +fs_visitor::emit_fp_minmax(const prog_instruction *fpi, + fs_reg dst, fs_reg src0, fs_reg src1) +{ + uint32_t conditionalmod; + if (fpi->Opcode == OPCODE_MIN) + conditionalmod = BRW_CONDITIONAL_L; + else + conditionalmod = BRW_CONDITIONAL_GE; + + for (int i = 0; i < 4; i++) { + if (fpi->DstReg.WriteMask & (1 << i)) { + emit_minmax(conditionalmod, regoffset(dst, i), + regoffset(src0, i), regoffset(src1, i)); + } + } +} + +void +fs_visitor::emit_fp_sop(uint32_t conditional_mod, + const struct prog_instruction *fpi, + fs_reg dst, fs_reg src0, fs_reg src1, + fs_reg one) +{ + for (int i = 0; i < 4; i++) { + if (fpi->DstReg.WriteMask & (1 << i)) { + fs_inst *inst; + + inst = emit(BRW_OPCODE_CMP, fs_reg(brw_null_reg()), + regoffset(src0, i), regoffset(src1, i)); + inst->conditional_mod = conditional_mod; + + inst = emit(BRW_OPCODE_SEL, regoffset(dst, i), one, fs_reg(0.0f)); + inst->predicated = true; + } + } +} + +void +fs_visitor::emit_fp_scalar_write(const struct prog_instruction *fpi, + fs_reg dst, fs_reg src) +{ + for (int i = 0; i < 4; i++) { + if (fpi->DstReg.WriteMask & (1 << i)) + emit(BRW_OPCODE_MOV, regoffset(dst, i), src); + } +} + +void +fs_visitor::emit_fp_scalar_math(enum opcode opcode, + const struct prog_instruction *fpi, + fs_reg dst, fs_reg src) +{ + fs_reg temp = fs_reg(this, glsl_type::float_type); + emit_math(opcode, temp, src); + emit_fp_scalar_write(fpi, dst, temp); +} + +void +fs_visitor::emit_fragment_program_code() +{ + setup_fp_regs(); + + fs_reg null = fs_reg(brw_null_reg()); + + /* Keep a reg with 1.0 around, for reuse by emit_fp_sop so that it can just + * be: + * + * sel.f0 dst 1.0 0.0 + * + * instead of + * + * mov dst 0.0 + * mov.f0 dst 1.0 + */ + fs_reg one = fs_reg(this, glsl_type::float_type); + emit(BRW_OPCODE_MOV, one, fs_reg(1.0f)); + + for (unsigned int insn = 0; insn < fp->Base.NumInstructions; insn++) { + const struct prog_instruction *fpi = &fp->Base.Instructions[insn]; + base_ir = fpi; + + //_mesa_print_instruction(fpi); + + fs_reg dst; + fs_reg src[3]; + + /* We always emit into a temporary destination register to avoid + * aliasing issues. + */ + dst = fs_reg(this, glsl_type::vec4_type); + + for (int i = 0; i < 3; i++) + src[i] = get_fp_src_reg(&fpi->SrcReg[i]); + + switch (fpi->Opcode) { + case OPCODE_ABS: + src[0].abs = true; + src[0].negate = false; + emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]); + break; + + case OPCODE_ADD: + emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], src[1]); + break; + + case OPCODE_CMP: + for (int i = 0; i < 4; i++) { + if (fpi->DstReg.WriteMask & (1 << i)) { + fs_inst *inst; + + inst = emit(BRW_OPCODE_CMP, null, + regoffset(src[0], i), fs_reg(0.0f)); + inst->conditional_mod = BRW_CONDITIONAL_L; + + inst = emit(BRW_OPCODE_SEL, regoffset(dst, i), + regoffset(src[1], i), regoffset(src[2], i)); + inst->predicated = true; + } + } + break; + + case OPCODE_COS: + emit_fp_scalar_math(SHADER_OPCODE_COS, fpi, dst, src[0]); + break; + + case OPCODE_DP2: + case OPCODE_DP3: + case OPCODE_DP4: + case OPCODE_DPH: { + fs_reg mul = fs_reg(this, glsl_type::float_type); + fs_reg acc = fs_reg(this, glsl_type::float_type); + int count; + + switch (fpi->Opcode) { + case OPCODE_DP2: count = 2; break; + case OPCODE_DP3: count = 3; break; + case OPCODE_DP4: count = 4; break; + case OPCODE_DPH: count = 3; break; + default: assert(!"not reached"); count = 0; break; + } + + emit(BRW_OPCODE_MUL, acc, + regoffset(src[0], 0), regoffset(src[1], 0)); + for (int i = 1; i < count; i++) { + emit(BRW_OPCODE_MUL, mul, + regoffset(src[0], i), regoffset(src[1], i)); + emit(BRW_OPCODE_ADD, acc, acc, mul); + } + + if (fpi->Opcode == OPCODE_DPH) + emit(BRW_OPCODE_ADD, acc, acc, regoffset(src[1], 3)); + + emit_fp_scalar_write(fpi, dst, acc); + break; + } + + case OPCODE_DST: + if (fpi->DstReg.WriteMask & WRITEMASK_X) + emit(BRW_OPCODE_MOV, dst, fs_reg(1.0f)); + if (fpi->DstReg.WriteMask & WRITEMASK_Y) { + emit(BRW_OPCODE_MUL, regoffset(dst, 1), + regoffset(src[0], 1), regoffset(src[1], 1)); + } + if (fpi->DstReg.WriteMask & WRITEMASK_Z) + emit(BRW_OPCODE_MOV, regoffset(dst, 2), regoffset(src[0], 2)); + if (fpi->DstReg.WriteMask & WRITEMASK_W) + emit(BRW_OPCODE_MOV, regoffset(dst, 3), regoffset(src[1], 3)); + break; + + case OPCODE_EX2: + emit_fp_scalar_math(SHADER_OPCODE_EXP2, fpi, dst, src[0]); + break; + + case OPCODE_FLR: + emit_fp_alu1(BRW_OPCODE_RNDD, fpi, dst, src[0]); + break; + + case OPCODE_FRC: + emit_fp_alu1(BRW_OPCODE_FRC, fpi, dst, src[0]); + break; + + case OPCODE_KIL: { + for (int i = 0; i < 4; i++) { + /* In most cases the argument to a KIL will be something like + * TEMP[0].wwww, so there's no point in checking whether .w is < 0 + * 4 times in a row. + */ + if (i > 0 && + GET_SWZ(fpi->SrcReg[0].Swizzle, i) == + GET_SWZ(fpi->SrcReg[0].Swizzle, i - 1) && + ((fpi->SrcReg[0].Negate >> i) & 1) == + ((fpi->SrcReg[0].Negate >> (i - 1)) & 1)) { + continue; + } + + fs_inst *inst = emit(BRW_OPCODE_CMP, null, + regoffset(src[0], i), 0.0f); + inst->conditional_mod = BRW_CONDITIONAL_L; + + inst = emit(BRW_OPCODE_IF); + inst->predicated = true; + emit(FS_OPCODE_DISCARD); + emit(BRW_OPCODE_ENDIF); + } + break; + } + + case OPCODE_LG2: + emit_fp_scalar_math(SHADER_OPCODE_LOG2, fpi, dst, src[0]); + break; + + case OPCODE_LIT: + /* From the ARB_fragment_program spec: + * + * tmp = VectorLoad(op0); + * if (tmp.x < 0) tmp.x = 0; + * if (tmp.y < 0) tmp.y = 0; + * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); + * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; + * result.x = 1.0; + * result.y = tmp.x; + * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; + * result.w = 1.0; + * + * Note that we don't do the clamping to +/- 128. We didn't in + * brw_wm_emit.c either. + */ + if (fpi->DstReg.WriteMask & WRITEMASK_X) + emit(BRW_OPCODE_MOV, regoffset(dst, 0), fs_reg(1.0f)); + + if (fpi->DstReg.WriteMask & WRITEMASK_YZ) { + fs_inst *inst; + inst = emit(BRW_OPCODE_CMP, null, + regoffset(src[0], 0), fs_reg(0.0f)); + inst->conditional_mod = BRW_CONDITIONAL_LE; + + if (fpi->DstReg.WriteMask & WRITEMASK_Y) { + emit(BRW_OPCODE_MOV, regoffset(dst, 1), regoffset(src[0], 0)); + inst = emit(BRW_OPCODE_MOV, regoffset(dst, 1), fs_reg(0.0f)); + inst->predicated = true; + } + + if (fpi->DstReg.WriteMask & WRITEMASK_Z) { + emit_math(SHADER_OPCODE_POW, regoffset(dst, 2), + regoffset(src[0], 1), regoffset(src[0], 3)); + + inst = emit(BRW_OPCODE_MOV, regoffset(dst, 2), fs_reg(0.0f)); + inst->predicated = true; + } + } + + if (fpi->DstReg.WriteMask & WRITEMASK_W) + emit(BRW_OPCODE_MOV, regoffset(dst, 3), fs_reg(1.0f)); + + break; + + case OPCODE_LRP: + for (int i = 0; i < 4; i++) { + if (fpi->DstReg.WriteMask & (1 << i)) { + fs_reg neg_src0 = regoffset(src[0], i); + neg_src0.negate = !neg_src0.negate; + fs_reg temp = fs_reg(this, glsl_type::float_type); + fs_reg temp2 = fs_reg(this, glsl_type::float_type); + emit(BRW_OPCODE_ADD, temp, neg_src0, fs_reg(1.0f)); + emit(BRW_OPCODE_MUL, temp, temp, regoffset(src[2], i)); + emit(BRW_OPCODE_MUL, temp2, + regoffset(src[0], i), regoffset(src[1], i)); + emit(BRW_OPCODE_ADD, regoffset(dst, i), temp, temp2); + } + } + break; + + case OPCODE_MAD: + for (int i = 0; i < 4; i++) { + if (fpi->DstReg.WriteMask & (1 << i)) { + fs_reg temp = fs_reg(this, glsl_type::float_type); + emit(BRW_OPCODE_MUL, temp, + regoffset(src[0], i), regoffset(src[1], i)); + emit(BRW_OPCODE_ADD, regoffset(dst, i), + temp, regoffset(src[2], i)); + } + } + break; + + case OPCODE_MAX: + emit_fp_minmax(fpi, dst, src[0], src[1]); + break; + + case OPCODE_MOV: + emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]); + break; + + case OPCODE_MIN: + emit_fp_minmax(fpi, dst, src[0], src[1]); + break; + + case OPCODE_MUL: + emit_fp_alu2(BRW_OPCODE_MUL, fpi, dst, src[0], src[1]); + break; + + case OPCODE_POW: { + fs_reg temp = fs_reg(this, glsl_type::float_type); + emit_math(SHADER_OPCODE_POW, temp, src[0], src[1]); + emit_fp_scalar_write(fpi, dst, temp); + break; + } + + case OPCODE_RCP: + emit_fp_scalar_math(SHADER_OPCODE_RCP, fpi, dst, src[0]); + break; + + case OPCODE_RSQ: + emit_fp_scalar_math(SHADER_OPCODE_RSQ, fpi, dst, src[0]); + break; + + case OPCODE_SCS: + if (fpi->DstReg.WriteMask & WRITEMASK_X) { + emit_math(SHADER_OPCODE_COS, regoffset(dst, 0), + regoffset(src[0], 0)); + } + + if (fpi->DstReg.WriteMask & WRITEMASK_Y) { + emit_math(SHADER_OPCODE_SIN, regoffset(dst, 1), + regoffset(src[0], 1)); + } + break; + + case OPCODE_SGE: + emit_fp_sop(BRW_CONDITIONAL_GE, fpi, dst, src[0], src[1], one); + break; + + case OPCODE_SIN: + emit_fp_scalar_math(SHADER_OPCODE_SIN, fpi, dst, src[0]); + break; + + case OPCODE_SLT: + emit_fp_sop(BRW_CONDITIONAL_L, fpi, dst, src[0], src[1], one); + break; + + case OPCODE_SUB: { + fs_reg neg_src1 = src[1]; + neg_src1.negate = !src[1].negate; + + emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], neg_src1); + break; + } + + case OPCODE_TEX: + case OPCODE_TXB: + case OPCODE_TXP: { + /* We piggy-back on the GLSL IR support for texture setup. To do so, + * we have to cook up an ir_texture that has the coordinate field + * with appropriate type, and shadow_comparitor set or not. All the + * other properties of ir_texture are passed in as arguments to the + * emit_texture_gen* function. + */ + ir_texture *ir = NULL; + + fs_reg lod; + fs_reg dpdy; + fs_reg coordinate = src[0]; + fs_reg shadow_c; + + switch (fpi->Opcode) { + case OPCODE_TEX: + ir = new(mem_ctx) ir_texture(ir_tex); + break; + case OPCODE_TXP: { + ir = new(mem_ctx) ir_texture(ir_tex); + + coordinate = fs_reg(this, glsl_type::vec3_type); + fs_reg invproj = fs_reg(this, glsl_type::float_type); + emit_math(SHADER_OPCODE_RCP, invproj, regoffset(src[0], 3)); + for (int i = 0; i < 3; i++) { + emit(BRW_OPCODE_MUL, regoffset(coordinate, i), + regoffset(src[0], i), invproj); + } + break; + } + case OPCODE_TXB: + ir = new(mem_ctx) ir_texture(ir_txb); + lod = regoffset(src[0], 3); + break; + default: + assert(!"not reached"); + break; + } + + const glsl_type *coordinate_type; + switch (fpi->TexSrcTarget) { + case TEXTURE_1D_INDEX: + coordinate_type = glsl_type::float_type; + break; + + case TEXTURE_2D_INDEX: + case TEXTURE_1D_ARRAY_INDEX: + case TEXTURE_RECT_INDEX: + case TEXTURE_EXTERNAL_INDEX: + coordinate_type = glsl_type::vec2_type; + break; + + case TEXTURE_3D_INDEX: + case TEXTURE_2D_ARRAY_INDEX: + coordinate_type = glsl_type::vec3_type; + break; + + case TEXTURE_CUBE_INDEX: { + coordinate_type = glsl_type::vec3_type; + + fs_reg temp = fs_reg(this, glsl_type::float_type); + fs_reg cubecoord = fs_reg(this, glsl_type::vec3_type); + fs_reg abscoord = coordinate; + abscoord.negate = false; + abscoord.abs = true; + emit_minmax(BRW_CONDITIONAL_GE, temp, + regoffset(abscoord, 0), regoffset(abscoord, 1)); + emit_minmax(BRW_CONDITIONAL_GE, temp, + temp, regoffset(abscoord, 2)); + emit_math(SHADER_OPCODE_RCP, temp, temp); + for (int i = 0; i < 3; i++) { + emit(BRW_OPCODE_MUL, regoffset(cubecoord, i), + regoffset(coordinate, i), temp); + } + + coordinate = cubecoord; + break; + } + + default: + assert(!"not reached"); + coordinate_type = glsl_type::vec2_type; + break; + } + + ir_constant_data junk_data; + ir->coordinate = new(mem_ctx) ir_constant(coordinate_type, &junk_data); + + coordinate = rescale_texcoord(ir, coordinate, + fpi->TexSrcTarget == TEXTURE_RECT_INDEX, + fpi->TexSrcUnit, fpi->TexSrcUnit); + + if (fpi->TexShadow) { + shadow_c = regoffset(coordinate, 2); + ir->shadow_comparitor = new(mem_ctx) ir_constant(0.0f); + } + + fs_inst *inst; + if (intel->gen >= 7) { + inst = emit_texture_gen7(ir, dst, coordinate, shadow_c, lod, dpdy); + } else if (intel->gen >= 5) { + inst = emit_texture_gen5(ir, dst, coordinate, shadow_c, lod, dpdy); + } else { + inst = emit_texture_gen4(ir, dst, coordinate, shadow_c, lod, dpdy); + } + + inst->sampler = fpi->TexSrcUnit; + inst->shadow_compare = fpi->TexShadow; + + /* Reuse the GLSL swizzle_result() handler. */ + swizzle_result(ir, dst, fpi->TexSrcUnit); + dst = this->result; + + break; + } + + case OPCODE_SWZ: + /* Note that SWZ's extended swizzles are handled in the general + * get_src_reg() code. + */ + emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]); + break; + + case OPCODE_XPD: + for (int i = 0; i < 3; i++) { + if (fpi->DstReg.WriteMask & (1 << i)) { + int i1 = (i + 1) % 3; + int i2 = (i + 2) % 3; + + fs_reg temp = fs_reg(this, glsl_type::float_type); + fs_reg neg_src1_1 = regoffset(src[1], i1); + neg_src1_1.negate = !neg_src1_1.negate; + emit(BRW_OPCODE_MUL, temp, + regoffset(src[0], i2), neg_src1_1); + emit(BRW_OPCODE_MUL, regoffset(dst, i), + regoffset(src[0], i1), regoffset(src[1], i2)); + emit(BRW_OPCODE_ADD, regoffset(dst, i), + regoffset(dst, i), temp); + } + } + break; + + case OPCODE_END: + break; + + default: + _mesa_problem(ctx, "Unsupported opcode %s in fragment program\n", + _mesa_opcode_string(fpi->Opcode)); + } + + /* To handle saturates, we emit a MOV with a saturate bit, which + * optimization should fold into the preceding instructions when safe. + */ + if (fpi->Opcode != OPCODE_END) { + fs_reg real_dst = get_fp_dst_reg(&fpi->DstReg); + + for (int i = 0; i < 4; i++) { + if (fpi->DstReg.WriteMask & (1 << i)) { + fs_inst *inst = emit(BRW_OPCODE_MOV, + regoffset(real_dst, i), + regoffset(dst, i)); + inst->saturate = fpi->SaturateMode; + } + } + } + } + + /* Epilogue: + * + * Fragment depth has this strange convention of being the .z component of + * a vec4. emit_fb_write() wants to see a float value, instead. + */ + this->current_annotation = "result.depth write"; + if (frag_depth.file != BAD_FILE) { + fs_reg temp = fs_reg(this, glsl_type::float_type); + emit(BRW_OPCODE_MOV, temp, regoffset(frag_depth, 2)); + frag_depth = temp; + } +} + +void +fs_visitor::setup_fp_regs() +{ + /* PROGRAM_TEMPORARY */ + int num_temp = fp->Base.NumTemporaries; + fp_temp_regs = rzalloc_array(mem_ctx, fs_reg, num_temp); + for (int i = 0; i < num_temp; i++) + fp_temp_regs[i] = fs_reg(this, glsl_type::vec4_type); + + /* PROGRAM_STATE_VAR, PROGRAM_NAMED_PARAM, etc. */ + if (c->dispatch_width == 8) { + for (unsigned p = 0; + p < c->fp->program.Base.Parameters->NumParameters; p++) { + for (unsigned int i = 0; i < 4; i++) { + this->param_index[c->prog_data.nr_params] = p; + this->param_offset[c->prog_data.nr_params] = i; + c->prog_data.nr_params++; + } + } + } + + fp_input_regs = rzalloc_array(mem_ctx, fs_reg, FRAG_ATTRIB_MAX); + for (int i = 0; i < FRAG_ATTRIB_MAX; i++) { + if (fp->Base.InputsRead & BITFIELD64_BIT(i)) { + /* Make up a dummy instruction to reuse code for emitting + * interpolation. + */ + ir_variable *ir = new(mem_ctx) ir_variable(glsl_type::vec4_type, + "fp_input", + ir_var_in); + ir->location = i; + + this->current_annotation = ralloc_asprintf(ctx, "interpolate input %d", + i); + + switch (i) { + case FRAG_ATTRIB_WPOS: + ir->pixel_center_integer = fp->PixelCenterInteger; + ir->origin_upper_left = fp->OriginUpperLeft; + fp_input_regs[i] = *emit_fragcoord_interpolation(ir); + break; + case FRAG_ATTRIB_FACE: + fp_input_regs[i] = *emit_frontfacing_interpolation(ir); + break; + default: + fp_input_regs[i] = *emit_general_interpolation(ir); + + if (i == FRAG_ATTRIB_FOGC) { + emit(BRW_OPCODE_MOV, + regoffset(fp_input_regs[i], 1), fs_reg(0.0f)); + emit(BRW_OPCODE_MOV, + regoffset(fp_input_regs[i], 2), fs_reg(0.0f)); + emit(BRW_OPCODE_MOV, + regoffset(fp_input_regs[i], 3), fs_reg(1.0f)); + } + + break; + } + + this->current_annotation = NULL; + } + } +} + +fs_reg +fs_visitor::get_fp_dst_reg(const prog_dst_register *dst) +{ + switch (dst->File) { + case PROGRAM_TEMPORARY: + return fp_temp_regs[dst->Index]; + + case PROGRAM_OUTPUT: + if (dst->Index == FRAG_RESULT_DEPTH) { + if (frag_depth.file == BAD_FILE) + frag_depth = fs_reg(this, glsl_type::vec4_type); + return frag_depth; + } else if (dst->Index == FRAG_RESULT_COLOR) { + if (outputs[0].file == BAD_FILE) { + outputs[0] = fs_reg(this, glsl_type::vec4_type); + output_components[0] = 4; + + /* Tell emit_fb_writes() to smear fragment.color across all the + * color attachments. + */ + for (int i = 1; i < c->key.nr_color_regions; i++) { + outputs[i] = outputs[0]; + output_components[i] = output_components[0]; + } + } + return outputs[0]; + } else { + int output_index = dst->Index - FRAG_RESULT_DATA0; + if (outputs[output_index].file == BAD_FILE) { + outputs[output_index] = fs_reg(this, glsl_type::vec4_type); + } + output_components[output_index] = 4; + return outputs[output_index]; + } + + case PROGRAM_UNDEFINED: + return fs_reg(); + + default: + _mesa_problem(ctx, "bad dst register file: %s\n", + _mesa_register_file_name((gl_register_file)dst->File)); + return fs_reg(this, glsl_type::vec4_type); + } +} + +fs_reg +fs_visitor::get_fp_src_reg(const prog_src_register *src) +{ + struct gl_program_parameter_list *plist = c->fp->program.Base.Parameters; + + fs_reg result; + + assert(!src->Abs); + + switch (src->File) { + case PROGRAM_UNDEFINED: + return fs_reg(); + case PROGRAM_TEMPORARY: + result = fp_temp_regs[src->Index]; + break; + + case PROGRAM_INPUT: + result = fp_input_regs[src->Index]; + break; + + case PROGRAM_STATE_VAR: + case PROGRAM_UNIFORM: + case PROGRAM_CONSTANT: + case PROGRAM_NAMED_PARAM: + /* We actually want to look at the type in the Parameters list for this, + * because this lets us upload constant builtin uniforms, as actual + * constants. + */ + switch (plist->Parameters[src->Index].Type) { + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: { + result = fs_reg(this, glsl_type::vec4_type); + + for (int i = 0; i < 4; i++) { + emit(BRW_OPCODE_MOV, regoffset(result, i), + fs_reg(plist->ParameterValues[src->Index][i].f)); + } + break; + } + + case PROGRAM_STATE_VAR: + case PROGRAM_UNIFORM: + result = fs_reg(UNIFORM, src->Index * 4); + break; + + default: + _mesa_problem(ctx, "bad uniform src register file: %s\n", + _mesa_register_file_name((gl_register_file)src->File)); + return fs_reg(this, glsl_type::vec4_type); + } + break; + + default: + _mesa_problem(ctx, "bad src register file: %s\n", + _mesa_register_file_name((gl_register_file)src->File)); + return fs_reg(this, glsl_type::vec4_type); + } + + if (src->Swizzle != SWIZZLE_NOOP || src->Negate) { + fs_reg unswizzled = result; + result = fs_reg(this, glsl_type::vec4_type); + for (int i = 0; i < 4; i++) { + bool negate = src->Negate & (1 << i); + /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ, + * but it costs us nothing to support it. + */ + int src_swiz = GET_SWZ(src->Swizzle, i); + if (src_swiz == SWIZZLE_ZERO) { + emit(BRW_OPCODE_MOV, regoffset(result, i), fs_reg(0.0f)); + } else if (src_swiz == SWIZZLE_ONE) { + emit(BRW_OPCODE_MOV, regoffset(result, i), + negate ? fs_reg(-1.0f) : fs_reg(1.0f)); + } else { + fs_reg src = regoffset(unswizzled, src_swiz); + if (negate) + src.negate = !src.negate; + emit(BRW_OPCODE_MOV, regoffset(result, i), src); + } + } + } + + return result; +} diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 134238d42a6..4603035792d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -2228,8 +2228,7 @@ fs_visitor::fs_visitor(struct brw_wm_compile *c, struct gl_shader_program *prog, this->c = c; this->p = &c->func; this->brw = p->brw; - this->fp = (struct gl_fragment_program *) - prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program; + this->fp = &c->fp->program; this->prog = prog; this->intel = &brw->intel; this->ctx = &intel->ctx; diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index f8eb54fce1d..fa0f684a626 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -85,46 +85,6 @@ GLuint brw_wm_is_scalar_result( GLuint opcode ) } } - -/** - * Do GPU code generation for non-GLSL shader. non-GLSL shaders have - * no flow control instructions so we can more readily do SSA-style - * optimizations. - */ -static void -brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) -{ - /* Augment fragment program. Add instructions for pre- and - * post-fragment-program tasks such as interpolation and fogging. - */ - brw_wm_pass_fp(c); - - /* Translate to intermediate representation. Build register usage - * chains. - */ - brw_wm_pass0(c); - - /* Dead code removal. - */ - brw_wm_pass1(c); - - /* Register allocation. - * Divide by two because we operate on 16 pixels at a time and require - * two GRF entries for each logical shader register. - */ - c->grf_limit = BRW_WM_MAX_GRF / 2; - - brw_wm_pass2(c); - - /* how many general-purpose registers are used */ - c->prog_data.reg_blocks = brw_register_blocks(c->max_wm_grf); - - /* Emit GEN4 code. - */ - brw_wm_emit(c); -} - - /** * Return a bitfield where bit n is set if barycentric interpolation mode n * (see enum brw_wm_barycentric_interp_mode) is needed by the fragment shader. @@ -356,23 +316,7 @@ bool do_wm_prog(struct brw_context *brw, brw_compute_barycentric_interp_modes(brw, c->key.flat_shade, &fp->program); - if (prog && prog->_LinkedShaders[MESA_SHADER_FRAGMENT]) { - if (!brw_wm_fs_emit(brw, c, prog)) - return false; - } else { - if (!c->instruction) { - c->instruction = rzalloc_array(c, struct brw_wm_instruction, BRW_WM_MAX_INSN); - c->prog_instructions = rzalloc_array(c, struct prog_instruction, BRW_WM_MAX_INSN); - c->vreg = rzalloc_array(c, struct brw_wm_value, BRW_WM_MAX_VREG); - c->refs = rzalloc_array(c, struct brw_wm_ref, BRW_WM_MAX_REF); - } - - /* Fallback for fixed function and ARB_fp shaders. */ - c->dispatch_width = 16; - brw_wm_payload_setup(brw, c); - brw_wm_non_glsl_emit(brw, c); - c->prog_data.dispatch_width = 16; - } + brw_wm_fs_emit(brw, c, prog); /* Scratch space is used for register spilling */ if (c->last_scratch) { diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index dd67795e743..ea2dea92a70 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -163,23 +163,8 @@ brw_upload_wm_unit(struct brw_context *brw) /* _NEW_COLOR */ wm->wm5.program_uses_killpixel = fp->UsesKill || ctx->Color.AlphaEnabled; - - /* BRW_NEW_FRAGMENT_PROGRAM - * - * If using the fragment shader backend, the program is always - * 8-wide. If not, it's always 16. - */ - if (ctx->Shader._CurrentFragmentProgram) { - struct brw_shader *shader = (struct brw_shader *) - ctx->Shader._CurrentFragmentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT]; - - if (shader != NULL && shader->ir != NULL) { - wm->wm5.enable_8_pix = 1; - if (brw->wm.prog_data->prog_offset_16) - wm->wm5.enable_16_pix = 1; - } - } - if (!wm->wm5.enable_8_pix) + wm->wm5.enable_8_pix = 1; + if (brw->wm.prog_data->prog_offset_16) wm->wm5.enable_16_pix = 1; wm->wm5.max_threads = brw->max_wm_threads - 1; diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index dd435286dee..bd28f97add4 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -151,13 +151,9 @@ upload_wm_state(struct brw_context *brw) dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT; /* CACHE_NEW_WM_PROG */ - if (brw->wm.prog_data->dispatch_width == 8) { - dw5 |= GEN6_WM_8_DISPATCH_ENABLE; - if (brw->wm.prog_data->prog_offset_16) - dw5 |= GEN6_WM_16_DISPATCH_ENABLE; - } else { + dw5 |= GEN6_WM_8_DISPATCH_ENABLE; + if (brw->wm.prog_data->prog_offset_16) dw5 |= GEN6_WM_16_DISPATCH_ENABLE; - } /* CACHE_NEW_WM_PROG | _NEW_COLOR */ if (brw->wm.prog_data->dual_src_blend && diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index dc49a7dfe2a..e0c69113ada 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -196,13 +196,9 @@ upload_ps_state(struct brw_context *brw) if (brw->fragment_program->Base.InputsRead != 0) dw4 |= GEN7_PS_ATTRIBUTE_ENABLE; - if (brw->wm.prog_data->dispatch_width == 8) { - dw4 |= GEN7_PS_8_DISPATCH_ENABLE; - if (brw->wm.prog_data->prog_offset_16) - dw4 |= GEN7_PS_16_DISPATCH_ENABLE; - } else { + dw4 |= GEN7_PS_8_DISPATCH_ENABLE; + if (brw->wm.prog_data->prog_offset_16) dw4 |= GEN7_PS_16_DISPATCH_ENABLE; - } dw5 |= (brw->wm.prog_data->first_curbe_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0);