i965: Add support for POW in gen6 FS.

Fixes glsl-algebraic-pow-2 in brw_wm_glsl.c mode.
This commit is contained in:
Eric Anholt 2010-08-22 01:33:57 -07:00 committed by Zhenyu Wang
parent 2f914053bc
commit fe2d4a5ea0
3 changed files with 121 additions and 26 deletions

View File

@ -885,6 +885,12 @@ void brw_math( struct brw_compile *p,
GLuint data_type,
GLuint precision );
void brw_math2(struct brw_compile *p,
struct brw_reg dest,
GLuint function,
struct brw_reg src0,
struct brw_reg src1);
void brw_dp_READ_16( struct brw_compile *p,
struct brw_reg dest,
GLuint scratch_offset );

View File

@ -1131,6 +1131,29 @@ void brw_math( struct brw_compile *p,
}
}
/** Extended math function, float[8].
*/
void brw_math2(struct brw_compile *p,
struct brw_reg dest,
GLuint function,
struct brw_reg src0,
struct brw_reg src1)
{
struct intel_context *intel = &p->brw->intel;
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
assert(intel->gen >= 6);
/* Math is the same ISA format as other opcodes, except that CondModifier
* becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
*/
insn->header.destreg__conditionalmod = function;
brw_set_dest(insn, dest);
brw_set_src0(insn, src0);
brw_set_src1(insn, src1);
}
/**
* Extended math function, float[16].
* Use 2 send instructions.

View File

@ -918,10 +918,8 @@ void emit_math2(struct brw_wm_compile *c,
const struct brw_reg *arg1)
{
struct brw_compile *p = &c->func;
struct intel_context *intel = &p->brw->intel;
int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
GLuint saturate = ((mask & SATURATE) ?
BRW_MATH_SATURATE_SATURATE :
BRW_MATH_SATURATE_NONE);
if (!(mask & WRITEMASK_XYZW))
return; /* Do not emit dead code */
@ -930,35 +928,103 @@ void emit_math2(struct brw_wm_compile *c,
brw_push_insn_state(p);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MOV(p, brw_message_reg(3), arg1[0]);
if (c->dispatch_width == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
}
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_math(p,
dst[dst_chan],
function,
saturate,
2,
arg0[0],
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
/* Send two messages to perform all 16 operations:
/* math can only operate on up to a vec8 at a time, so in
* dispatch_width==16 we have to do the second half manually.
*/
if (c->dispatch_width == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
if (intel->gen >= 6) {
struct brw_reg src0 = arg0[0];
struct brw_reg src1 = arg1[0];
struct brw_reg temp_dst = dst[dst_chan];
if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
/* Both scalar arguments. Do scalar calc. */
src0.hstride = BRW_HORIZONTAL_STRIDE_1;
src1.hstride = BRW_HORIZONTAL_STRIDE_1;
temp_dst.hstride = BRW_HORIZONTAL_STRIDE_1;
temp_dst.width = BRW_WIDTH_1;
if (arg0[0].subnr != 0) {
brw_MOV(p, temp_dst, src0);
src0 = temp_dst;
/* Ouch. We've used the temp as a dst, and we still
* need a temp to store arg1 in, because src and dst
* offsets have to be equal. Leaving this up to
* glsl2-965 to handle correctly.
*/
assert(arg1[0].subnr == 0);
} else if (arg1[0].subnr != 0) {
brw_MOV(p, temp_dst, src1);
src1 = temp_dst;
}
} else {
brw_MOV(p, temp_dst, src0);
src0 = temp_dst;
}
} else if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
brw_MOV(p, temp_dst, src1);
src1 = temp_dst;
}
brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_math2(p,
temp_dst,
function,
src0,
src1);
if (c->dispatch_width == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
brw_math2(p,
sechalf(temp_dst),
function,
sechalf(src0),
sechalf(src1));
}
/* Splat a scalar result into all the channels. */
if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 &&
arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
temp_dst.hstride = BRW_HORIZONTAL_STRIDE_0;
temp_dst.vstride = BRW_VERTICAL_STRIDE_0;
brw_MOV(p, dst[dst_chan], temp_dst);
}
} else {
GLuint saturate = ((mask & SATURATE) ?
BRW_MATH_SATURATE_SATURATE :
BRW_MATH_SATURATE_NONE);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MOV(p, brw_message_reg(3), arg1[0]);
if (c->dispatch_width == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
}
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_math(p,
offset(dst[dst_chan],1),
dst[dst_chan],
function,
saturate,
4,
sechalf(arg0[0]),
2,
arg0[0],
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
/* Send two messages to perform all 16 operations:
*/
if (c->dispatch_width == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
brw_math(p,
offset(dst[dst_chan],1),
function,
saturate,
4,
sechalf(arg0[0]),
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
}
}
brw_pop_insn_state(p);
}