i965/fs: Allow scalar source regions on SNB math instructions.
I haven't found any evidence that this isn't supported by the hardware, in fact according to the SNB hardware spec: "The supported regioning modes for math instructions are align16, align1 with the following restrictions: - Scalar source is supported. [...] - Source and destination offset must be the same, except the case of scalar source." Cc: "12.0" <mesa-stable@lists.freedesktop.org> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Reviewed-by: Matt Turner <mattst88@gmail.com>
This commit is contained in:
parent
06d8765bc0
commit
c1107cec44
|
@ -2000,8 +2000,10 @@ void gen6_math(struct brw_codegen *p,
|
||||||
|
|
||||||
assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
|
assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
|
||||||
if (devinfo->gen == 6) {
|
if (devinfo->gen == 6) {
|
||||||
assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
|
assert(has_scalar_region(src0) ||
|
||||||
assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
|
src0.hstride == BRW_HORIZONTAL_STRIDE_1);
|
||||||
|
assert(has_scalar_region(src1) ||
|
||||||
|
src1.hstride == BRW_HORIZONTAL_STRIDE_1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
|
if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
|
||||||
|
|
|
@ -621,20 +621,14 @@ namespace brw {
|
||||||
src_reg
|
src_reg
|
||||||
fix_math_operand(const src_reg &src) const
|
fix_math_operand(const src_reg &src) const
|
||||||
{
|
{
|
||||||
/* Can't do hstride == 0 args on gen6 math, so expand it out. We
|
/* Gen6 hardware ignores source modifiers (negate and abs) on math
|
||||||
* might be able to do better by doing execsize = 1 math and then
|
|
||||||
* expanding that result out, but we would need to be careful with
|
|
||||||
* masking.
|
|
||||||
*
|
|
||||||
* Gen6 hardware ignores source modifiers (negate and abs) on math
|
|
||||||
* instructions, so we also move to a temp to set those up.
|
* instructions, so we also move to a temp to set those up.
|
||||||
*
|
*
|
||||||
* Gen7 relaxes most of the above restrictions, but still can't use IMM
|
* Gen7 relaxes most of the above restrictions, but still can't use IMM
|
||||||
* operands to math
|
* operands to math
|
||||||
*/
|
*/
|
||||||
if ((shader->devinfo->gen == 6 &&
|
if ((shader->devinfo->gen == 6 &&
|
||||||
(src.file == IMM || src.file == UNIFORM ||
|
(src.file == IMM || src.abs || src.negate)) ||
|
||||||
src.abs || src.negate)) ||
|
|
||||||
(shader->devinfo->gen == 7 && src.file == IMM)) {
|
(shader->devinfo->gen == 7 && src.file == IMM)) {
|
||||||
const dst_reg tmp = vgrf(src.type);
|
const dst_reg tmp = vgrf(src.type);
|
||||||
MOV(tmp, src);
|
MOV(tmp, src);
|
||||||
|
|
|
@ -578,14 +578,9 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
|
||||||
break;
|
break;
|
||||||
/* fallthrough */
|
/* fallthrough */
|
||||||
case SHADER_OPCODE_POW:
|
case SHADER_OPCODE_POW:
|
||||||
/* Allow constant propagation into src1 (except on Gen 6), and let
|
/* Allow constant propagation into src1, and let constant combining
|
||||||
* constant combining promote the constant on Gen < 8.
|
* promote the constant on Gen < 8.
|
||||||
*
|
|
||||||
* While Gen 6 MATH can take a scalar source, its source and
|
|
||||||
* destination offsets must be equal and we cannot ensure that.
|
|
||||||
*/
|
*/
|
||||||
if (devinfo->gen == 6)
|
|
||||||
break;
|
|
||||||
/* fallthrough */
|
/* fallthrough */
|
||||||
case BRW_OPCODE_BFI1:
|
case BRW_OPCODE_BFI1:
|
||||||
case BRW_OPCODE_ASR:
|
case BRW_OPCODE_ASR:
|
||||||
|
|
Loading…
Reference in New Issue