i965: Add support for POW in gen6 FS.
Fixes glsl-algebraic-pow-2 in brw_wm_glsl.c mode.
This commit is contained in:
parent
2f914053bc
commit
fe2d4a5ea0
|
@ -885,6 +885,12 @@ void brw_math( struct brw_compile *p,
|
||||||
GLuint data_type,
|
GLuint data_type,
|
||||||
GLuint precision );
|
GLuint precision );
|
||||||
|
|
||||||
|
void brw_math2(struct brw_compile *p,
|
||||||
|
struct brw_reg dest,
|
||||||
|
GLuint function,
|
||||||
|
struct brw_reg src0,
|
||||||
|
struct brw_reg src1);
|
||||||
|
|
||||||
void brw_dp_READ_16( struct brw_compile *p,
|
void brw_dp_READ_16( struct brw_compile *p,
|
||||||
struct brw_reg dest,
|
struct brw_reg dest,
|
||||||
GLuint scratch_offset );
|
GLuint scratch_offset );
|
||||||
|
|
|
@ -1131,6 +1131,29 @@ void brw_math( struct brw_compile *p,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Extended math function, float[8].
|
||||||
|
*/
|
||||||
|
void brw_math2(struct brw_compile *p,
|
||||||
|
struct brw_reg dest,
|
||||||
|
GLuint function,
|
||||||
|
struct brw_reg src0,
|
||||||
|
struct brw_reg src1)
|
||||||
|
{
|
||||||
|
struct intel_context *intel = &p->brw->intel;
|
||||||
|
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
|
||||||
|
|
||||||
|
assert(intel->gen >= 6);
|
||||||
|
|
||||||
|
/* Math is the same ISA format as other opcodes, except that CondModifier
|
||||||
|
* becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
|
||||||
|
*/
|
||||||
|
insn->header.destreg__conditionalmod = function;
|
||||||
|
|
||||||
|
brw_set_dest(insn, dest);
|
||||||
|
brw_set_src0(insn, src0);
|
||||||
|
brw_set_src1(insn, src1);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extended math function, float[16].
|
* Extended math function, float[16].
|
||||||
* Use 2 send instructions.
|
* Use 2 send instructions.
|
||||||
|
|
|
@ -918,10 +918,8 @@ void emit_math2(struct brw_wm_compile *c,
|
||||||
const struct brw_reg *arg1)
|
const struct brw_reg *arg1)
|
||||||
{
|
{
|
||||||
struct brw_compile *p = &c->func;
|
struct brw_compile *p = &c->func;
|
||||||
|
struct intel_context *intel = &p->brw->intel;
|
||||||
int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
|
int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
|
||||||
GLuint saturate = ((mask & SATURATE) ?
|
|
||||||
BRW_MATH_SATURATE_SATURATE :
|
|
||||||
BRW_MATH_SATURATE_NONE);
|
|
||||||
|
|
||||||
if (!(mask & WRITEMASK_XYZW))
|
if (!(mask & WRITEMASK_XYZW))
|
||||||
return; /* Do not emit dead code */
|
return; /* Do not emit dead code */
|
||||||
|
@ -930,35 +928,103 @@ void emit_math2(struct brw_wm_compile *c,
|
||||||
|
|
||||||
brw_push_insn_state(p);
|
brw_push_insn_state(p);
|
||||||
|
|
||||||
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
|
/* math can only operate on up to a vec8 at a time, so in
|
||||||
brw_MOV(p, brw_message_reg(3), arg1[0]);
|
* dispatch_width==16 we have to do the second half manually.
|
||||||
if (c->dispatch_width == 16) {
|
|
||||||
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
|
|
||||||
brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
|
|
||||||
}
|
|
||||||
|
|
||||||
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
|
|
||||||
brw_math(p,
|
|
||||||
dst[dst_chan],
|
|
||||||
function,
|
|
||||||
saturate,
|
|
||||||
2,
|
|
||||||
arg0[0],
|
|
||||||
BRW_MATH_DATA_VECTOR,
|
|
||||||
BRW_MATH_PRECISION_FULL);
|
|
||||||
|
|
||||||
/* Send two messages to perform all 16 operations:
|
|
||||||
*/
|
*/
|
||||||
if (c->dispatch_width == 16) {
|
if (intel->gen >= 6) {
|
||||||
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
|
struct brw_reg src0 = arg0[0];
|
||||||
|
struct brw_reg src1 = arg1[0];
|
||||||
|
struct brw_reg temp_dst = dst[dst_chan];
|
||||||
|
|
||||||
|
if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
|
||||||
|
if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
|
||||||
|
/* Both scalar arguments. Do scalar calc. */
|
||||||
|
src0.hstride = BRW_HORIZONTAL_STRIDE_1;
|
||||||
|
src1.hstride = BRW_HORIZONTAL_STRIDE_1;
|
||||||
|
temp_dst.hstride = BRW_HORIZONTAL_STRIDE_1;
|
||||||
|
temp_dst.width = BRW_WIDTH_1;
|
||||||
|
|
||||||
|
if (arg0[0].subnr != 0) {
|
||||||
|
brw_MOV(p, temp_dst, src0);
|
||||||
|
src0 = temp_dst;
|
||||||
|
|
||||||
|
/* Ouch. We've used the temp as a dst, and we still
|
||||||
|
* need a temp to store arg1 in, because src and dst
|
||||||
|
* offsets have to be equal. Leaving this up to
|
||||||
|
* glsl2-965 to handle correctly.
|
||||||
|
*/
|
||||||
|
assert(arg1[0].subnr == 0);
|
||||||
|
} else if (arg1[0].subnr != 0) {
|
||||||
|
brw_MOV(p, temp_dst, src1);
|
||||||
|
src1 = temp_dst;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
brw_MOV(p, temp_dst, src0);
|
||||||
|
src0 = temp_dst;
|
||||||
|
}
|
||||||
|
} else if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
|
||||||
|
brw_MOV(p, temp_dst, src1);
|
||||||
|
src1 = temp_dst;
|
||||||
|
}
|
||||||
|
|
||||||
|
brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
|
||||||
|
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
|
||||||
|
brw_math2(p,
|
||||||
|
temp_dst,
|
||||||
|
function,
|
||||||
|
src0,
|
||||||
|
src1);
|
||||||
|
if (c->dispatch_width == 16) {
|
||||||
|
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
|
||||||
|
brw_math2(p,
|
||||||
|
sechalf(temp_dst),
|
||||||
|
function,
|
||||||
|
sechalf(src0),
|
||||||
|
sechalf(src1));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Splat a scalar result into all the channels. */
|
||||||
|
if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 &&
|
||||||
|
arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
|
||||||
|
temp_dst.hstride = BRW_HORIZONTAL_STRIDE_0;
|
||||||
|
temp_dst.vstride = BRW_VERTICAL_STRIDE_0;
|
||||||
|
brw_MOV(p, dst[dst_chan], temp_dst);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
GLuint saturate = ((mask & SATURATE) ?
|
||||||
|
BRW_MATH_SATURATE_SATURATE :
|
||||||
|
BRW_MATH_SATURATE_NONE);
|
||||||
|
|
||||||
|
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
|
||||||
|
brw_MOV(p, brw_message_reg(3), arg1[0]);
|
||||||
|
if (c->dispatch_width == 16) {
|
||||||
|
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
|
||||||
|
brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
|
||||||
|
}
|
||||||
|
|
||||||
|
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
|
||||||
brw_math(p,
|
brw_math(p,
|
||||||
offset(dst[dst_chan],1),
|
dst[dst_chan],
|
||||||
function,
|
function,
|
||||||
saturate,
|
saturate,
|
||||||
4,
|
2,
|
||||||
sechalf(arg0[0]),
|
arg0[0],
|
||||||
BRW_MATH_DATA_VECTOR,
|
BRW_MATH_DATA_VECTOR,
|
||||||
BRW_MATH_PRECISION_FULL);
|
BRW_MATH_PRECISION_FULL);
|
||||||
|
|
||||||
|
/* Send two messages to perform all 16 operations:
|
||||||
|
*/
|
||||||
|
if (c->dispatch_width == 16) {
|
||||||
|
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
|
||||||
|
brw_math(p,
|
||||||
|
offset(dst[dst_chan],1),
|
||||||
|
function,
|
||||||
|
saturate,
|
||||||
|
4,
|
||||||
|
sechalf(arg0[0]),
|
||||||
|
BRW_MATH_DATA_VECTOR,
|
||||||
|
BRW_MATH_PRECISION_FULL);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
brw_pop_insn_state(p);
|
brw_pop_insn_state(p);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue