i965: Add support for POW in gen6 FS.
Fixes glsl-algebraic-pow-2 in brw_wm_glsl.c mode.
This commit is contained in:
parent
2f914053bc
commit
fe2d4a5ea0
|
@ -885,6 +885,12 @@ void brw_math( struct brw_compile *p,
|
|||
GLuint data_type,
|
||||
GLuint precision );
|
||||
|
||||
void brw_math2(struct brw_compile *p,
|
||||
struct brw_reg dest,
|
||||
GLuint function,
|
||||
struct brw_reg src0,
|
||||
struct brw_reg src1);
|
||||
|
||||
void brw_dp_READ_16( struct brw_compile *p,
|
||||
struct brw_reg dest,
|
||||
GLuint scratch_offset );
|
||||
|
|
|
@ -1131,6 +1131,29 @@ void brw_math( struct brw_compile *p,
|
|||
}
|
||||
}
|
||||
|
||||
/** Extended math function, float[8].
|
||||
*/
|
||||
void brw_math2(struct brw_compile *p,
|
||||
struct brw_reg dest,
|
||||
GLuint function,
|
||||
struct brw_reg src0,
|
||||
struct brw_reg src1)
|
||||
{
|
||||
struct intel_context *intel = &p->brw->intel;
|
||||
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
|
||||
|
||||
assert(intel->gen >= 6);
|
||||
|
||||
/* Math is the same ISA format as other opcodes, except that CondModifier
|
||||
* becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
|
||||
*/
|
||||
insn->header.destreg__conditionalmod = function;
|
||||
|
||||
brw_set_dest(insn, dest);
|
||||
brw_set_src0(insn, src0);
|
||||
brw_set_src1(insn, src1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extended math function, float[16].
|
||||
* Use 2 send instructions.
|
||||
|
|
|
@ -918,10 +918,8 @@ void emit_math2(struct brw_wm_compile *c,
|
|||
const struct brw_reg *arg1)
|
||||
{
|
||||
struct brw_compile *p = &c->func;
|
||||
struct intel_context *intel = &p->brw->intel;
|
||||
int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
|
||||
GLuint saturate = ((mask & SATURATE) ?
|
||||
BRW_MATH_SATURATE_SATURATE :
|
||||
BRW_MATH_SATURATE_NONE);
|
||||
|
||||
if (!(mask & WRITEMASK_XYZW))
|
||||
return; /* Do not emit dead code */
|
||||
|
@ -930,35 +928,103 @@ void emit_math2(struct brw_wm_compile *c,
|
|||
|
||||
brw_push_insn_state(p);
|
||||
|
||||
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
|
||||
brw_MOV(p, brw_message_reg(3), arg1[0]);
|
||||
if (c->dispatch_width == 16) {
|
||||
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
|
||||
brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
|
||||
}
|
||||
|
||||
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
|
||||
brw_math(p,
|
||||
dst[dst_chan],
|
||||
function,
|
||||
saturate,
|
||||
2,
|
||||
arg0[0],
|
||||
BRW_MATH_DATA_VECTOR,
|
||||
BRW_MATH_PRECISION_FULL);
|
||||
|
||||
/* Send two messages to perform all 16 operations:
|
||||
/* math can only operate on up to a vec8 at a time, so in
|
||||
* dispatch_width==16 we have to do the second half manually.
|
||||
*/
|
||||
if (c->dispatch_width == 16) {
|
||||
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
|
||||
if (intel->gen >= 6) {
|
||||
struct brw_reg src0 = arg0[0];
|
||||
struct brw_reg src1 = arg1[0];
|
||||
struct brw_reg temp_dst = dst[dst_chan];
|
||||
|
||||
if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
|
||||
if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
|
||||
/* Both scalar arguments. Do scalar calc. */
|
||||
src0.hstride = BRW_HORIZONTAL_STRIDE_1;
|
||||
src1.hstride = BRW_HORIZONTAL_STRIDE_1;
|
||||
temp_dst.hstride = BRW_HORIZONTAL_STRIDE_1;
|
||||
temp_dst.width = BRW_WIDTH_1;
|
||||
|
||||
if (arg0[0].subnr != 0) {
|
||||
brw_MOV(p, temp_dst, src0);
|
||||
src0 = temp_dst;
|
||||
|
||||
/* Ouch. We've used the temp as a dst, and we still
|
||||
* need a temp to store arg1 in, because src and dst
|
||||
* offsets have to be equal. Leaving this up to
|
||||
* glsl2-965 to handle correctly.
|
||||
*/
|
||||
assert(arg1[0].subnr == 0);
|
||||
} else if (arg1[0].subnr != 0) {
|
||||
brw_MOV(p, temp_dst, src1);
|
||||
src1 = temp_dst;
|
||||
}
|
||||
} else {
|
||||
brw_MOV(p, temp_dst, src0);
|
||||
src0 = temp_dst;
|
||||
}
|
||||
} else if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
|
||||
brw_MOV(p, temp_dst, src1);
|
||||
src1 = temp_dst;
|
||||
}
|
||||
|
||||
brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
|
||||
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
|
||||
brw_math2(p,
|
||||
temp_dst,
|
||||
function,
|
||||
src0,
|
||||
src1);
|
||||
if (c->dispatch_width == 16) {
|
||||
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
|
||||
brw_math2(p,
|
||||
sechalf(temp_dst),
|
||||
function,
|
||||
sechalf(src0),
|
||||
sechalf(src1));
|
||||
}
|
||||
|
||||
/* Splat a scalar result into all the channels. */
|
||||
if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 &&
|
||||
arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
|
||||
temp_dst.hstride = BRW_HORIZONTAL_STRIDE_0;
|
||||
temp_dst.vstride = BRW_VERTICAL_STRIDE_0;
|
||||
brw_MOV(p, dst[dst_chan], temp_dst);
|
||||
}
|
||||
} else {
|
||||
GLuint saturate = ((mask & SATURATE) ?
|
||||
BRW_MATH_SATURATE_SATURATE :
|
||||
BRW_MATH_SATURATE_NONE);
|
||||
|
||||
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
|
||||
brw_MOV(p, brw_message_reg(3), arg1[0]);
|
||||
if (c->dispatch_width == 16) {
|
||||
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
|
||||
brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
|
||||
}
|
||||
|
||||
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
|
||||
brw_math(p,
|
||||
offset(dst[dst_chan],1),
|
||||
dst[dst_chan],
|
||||
function,
|
||||
saturate,
|
||||
4,
|
||||
sechalf(arg0[0]),
|
||||
2,
|
||||
arg0[0],
|
||||
BRW_MATH_DATA_VECTOR,
|
||||
BRW_MATH_PRECISION_FULL);
|
||||
|
||||
/* Send two messages to perform all 16 operations:
|
||||
*/
|
||||
if (c->dispatch_width == 16) {
|
||||
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
|
||||
brw_math(p,
|
||||
offset(dst[dst_chan],1),
|
||||
function,
|
||||
saturate,
|
||||
4,
|
||||
sechalf(arg0[0]),
|
||||
BRW_MATH_DATA_VECTOR,
|
||||
BRW_MATH_PRECISION_FULL);
|
||||
}
|
||||
}
|
||||
brw_pop_insn_state(p);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue