svga: workaround for unexpected double swizzle

SM5 requires swizzles for 64 bits alu source to be either .xyzw,
.xyxy, .zwxy, or .zwzw. If the swizzles are not in the valid pattern,
move the source according to the specified swizzle to a temporary register
first.

Reviewed-by: Neha Bhende <bhenden@vmware.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16464>
This commit is contained in:
Charmaine Lee 2022-05-10 22:23:13 -07:00 committed by Marge Bot
parent 5cfae66cde
commit 55a4bdb52f
1 changed files with 86 additions and 46 deletions

View File

@ -9236,7 +9236,21 @@ opcode_has_dbl_src(unsigned opcode)
case TGSI_OPCODE_D2I:
case TGSI_OPCODE_D2U:
case TGSI_OPCODE_DFMA:
// XXX more TBD
case TGSI_OPCODE_DSLT:
case TGSI_OPCODE_DSGE:
case TGSI_OPCODE_DSEQ:
case TGSI_OPCODE_DSNE:
case TGSI_OPCODE_DRCP:
case TGSI_OPCODE_DSQRT:
case TGSI_OPCODE_DMAD:
case TGSI_OPCODE_DLDEXP:
case TGSI_OPCODE_DFRACEXP:
case TGSI_OPCODE_DRSQ:
case TGSI_OPCODE_DTRUNC:
case TGSI_OPCODE_DCEIL:
case TGSI_OPCODE_DFLR:
case TGSI_OPCODE_DROUND:
case TGSI_OPCODE_DSSG:
return true;
default:
return false;
@ -9246,23 +9260,36 @@ opcode_has_dbl_src(unsigned opcode)
/**
* Check that the swizzle for reading from a double-precision register
* is valid.
* is valid. If not valid, move the source to a temporary register first.
*/
static void
check_double_src_swizzle(const struct tgsi_full_src_register *reg)
static struct tgsi_full_src_register
check_double_src(struct svga_shader_emitter_v10 *emit,
const struct tgsi_full_src_register *reg)
{
assert((reg->Register.SwizzleX == PIPE_SWIZZLE_X &&
reg->Register.SwizzleY == PIPE_SWIZZLE_Y) ||
(reg->Register.SwizzleX == PIPE_SWIZZLE_Z &&
reg->Register.SwizzleY == PIPE_SWIZZLE_W));
struct tgsi_full_src_register src;
assert((reg->Register.SwizzleZ == PIPE_SWIZZLE_X &&
reg->Register.SwizzleW == PIPE_SWIZZLE_Y) ||
(reg->Register.SwizzleZ == PIPE_SWIZZLE_Z &&
reg->Register.SwizzleW == PIPE_SWIZZLE_W));
if (((reg->Register.SwizzleX == PIPE_SWIZZLE_X &&
reg->Register.SwizzleY == PIPE_SWIZZLE_Y) ||
(reg->Register.SwizzleX == PIPE_SWIZZLE_Z &&
reg->Register.SwizzleY == PIPE_SWIZZLE_W)) &&
((reg->Register.SwizzleZ == PIPE_SWIZZLE_X &&
reg->Register.SwizzleW == PIPE_SWIZZLE_Y) ||
(reg->Register.SwizzleZ == PIPE_SWIZZLE_Z &&
reg->Register.SwizzleW == PIPE_SWIZZLE_W))) {
src = *reg;
} else {
/* move the src to a temporary to fix the swizzle */
unsigned tmp = get_temp_index(emit);
struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp_dst, reg);
src = tmp_src;
/* The temporary index will be released in the caller */
}
return src;
}
/**
* Check that the writemask for a double-precision instruction is valid.
*/
@ -9312,14 +9339,16 @@ emit_dabs(struct svga_shader_emitter_v10 *emit,
const struct tgsi_full_instruction *inst)
{
assert(emit->version >= 50);
check_double_src_swizzle(&inst->Src[0]);
struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]);
check_double_dst_writemask(inst);
struct tgsi_full_src_register abs_src = absolute_src(&inst->Src[0]);
struct tgsi_full_src_register abs_src = absolute_src(&src);
/* DMOV dst, |src| */
emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &abs_src);
free_temp_indexes(emit);
return TRUE;
}
@ -9332,14 +9361,15 @@ emit_dneg(struct svga_shader_emitter_v10 *emit,
const struct tgsi_full_instruction *inst)
{
assert(emit->version >= 50);
check_double_src_swizzle(&inst->Src[0]);
struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]);
check_double_dst_writemask(inst);
struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]);
struct tgsi_full_src_register neg_src = negate_src(&src);
/* DMOV dst, -src */
emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &neg_src);
free_temp_indexes(emit);
return TRUE;
}
@ -9352,9 +9382,9 @@ emit_dmad(struct svga_shader_emitter_v10 *emit,
const struct tgsi_full_instruction *inst)
{
assert(emit->version >= 50);
check_double_src_swizzle(&inst->Src[0]);
check_double_src_swizzle(&inst->Src[1]);
check_double_src_swizzle(&inst->Src[2]);
struct tgsi_full_src_register src0 = check_double_src(emit, &inst->Src[0]);
struct tgsi_full_src_register src1 = check_double_src(emit, &inst->Src[1]);
struct tgsi_full_src_register src2 = check_double_src(emit, &inst->Src[2]);
check_double_dst_writemask(inst);
unsigned tmp = get_temp_index(emit);
@ -9363,12 +9393,12 @@ emit_dmad(struct svga_shader_emitter_v10 *emit,
/* DMUL tmp, src[0], src[1] */
emit_instruction_opn(emit, VGPU10_OPCODE_DMUL,
&tmp_dst, &inst->Src[0], &inst->Src[1], NULL,
&tmp_dst, &src0, &src1, NULL,
FALSE, inst->Instruction.Precise);
/* DADD dst, tmp, src[2] */
emit_instruction_opn(emit, VGPU10_OPCODE_DADD,
&inst->Dst[0], &tmp_src, &inst->Src[2], NULL,
&inst->Dst[0], &tmp_src, &src2, NULL,
inst->Instruction.Saturate, inst->Instruction.Precise);
free_temp_indexes(emit);
@ -9387,20 +9417,20 @@ emit_drsq(struct svga_shader_emitter_v10 *emit,
assert(emit->version >= 50);
VGPU10OpcodeToken0 token0;
struct tgsi_full_src_register dsrc = check_double_src(emit, src);
begin_emit_instruction(emit);
token0.value = 0;
token0.opcodeType = VGPU10_OPCODE_VMWARE;
token0.vmwareOpcodeType = VGPU10_VMWARE_OPCODE_DRSQ;
emit_dword(emit, token0.value);
emit_dst_register(emit, dst);
check_double_src_swizzle(src);
emit_src_register(emit, src);
emit_src_register(emit, &dsrc);
end_emit_instruction(emit);
free_temp_indexes(emit);
return TRUE;
}
@ -9416,7 +9446,7 @@ emit_dsqrt(struct svga_shader_emitter_v10 *emit,
{
assert(emit->version >= 50);
check_double_src_swizzle(&inst->Src[0]);
struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]);
/* temporary register to hold the source */
unsigned tmp = get_temp_index(emit);
@ -9451,9 +9481,9 @@ emit_dsqrt(struct svga_shader_emitter_v10 *emit,
make_immediate_reg_double(emit, 1.0);
emit_instruction_op2(emit, VGPU10_OPCODE_DEQ, &tmp_cond_dst_xy,
&zero, &inst->Src[0]);
&zero, &src);
emit_instruction_op3(emit, VGPU10_OPCODE_DMOVC, &tmp_dst,
&tmp_cond_src_xy, &one, &inst->Src[0]);
&tmp_cond_src_xy, &one, &src);
struct tgsi_full_dst_register tmp_rsq_dst = make_dst_temp_reg(tmp);
struct tgsi_full_src_register tmp_rsq_src = make_src_temp_reg(tmp);
@ -9463,7 +9493,7 @@ emit_dsqrt(struct svga_shader_emitter_v10 *emit,
/* DMUL dst, tmp_rsq, src[0] */
emit_instruction_op2(emit, VGPU10_OPCODE_DMUL, &inst->Dst[0],
&tmp_rsq_src, &inst->Src[0]);
&tmp_rsq_src, &src);
free_temp_indexes(emit);
@ -9485,9 +9515,7 @@ emit_dtrunc(struct svga_shader_emitter_v10 *emit,
{
assert(emit->version >= 50);
check_double_src_swizzle(&inst->Src[0]);
struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]);
/* frac = DFRAC(src) */
unsigned frac_index = get_temp_index(emit);
@ -9501,7 +9529,7 @@ emit_dtrunc(struct svga_shader_emitter_v10 *emit,
token0.vmwareOpcodeType = VGPU10_VMWARE_OPCODE_DFRC;
emit_dword(emit, token0.value);
emit_dst_register(emit, &frac_dst);
emit_src_register(emit, &inst->Src[0]);
emit_src_register(emit, &src);
end_emit_instruction(emit);
/* tmp = src - frac */
@ -9510,7 +9538,7 @@ emit_dtrunc(struct svga_shader_emitter_v10 *emit,
struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp_index);
struct tgsi_full_src_register negate_frac_src = negate_src(&frac_src);
emit_instruction_opn(emit, VGPU10_OPCODE_DADD,
&tmp_dst, &inst->Src[0], &negate_frac_src, NULL,
&tmp_dst, &src, &negate_frac_src, NULL,
inst->Instruction.Saturate, inst->Instruction.Precise);
/* cond = frac==0 */
@ -9548,7 +9576,7 @@ emit_dtrunc(struct svga_shader_emitter_v10 *emit,
/* cond = src>=0 */
emit_instruction_opn(emit, VGPU10_OPCODE_DGE,
&cond_dst, &inst->Src[0], &zero, NULL,
&cond_dst, &src, &zero, NULL,
inst->Instruction.Saturate, inst->Instruction.Precise);
/* dst = cond ? tmp : tmp2 */
@ -9610,6 +9638,8 @@ emit_simple(struct svga_shader_emitter_v10 *emit,
const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode);
unsigned i;
struct tgsi_full_src_register src[3];
if (inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP) {
emit->current_loop_depth++;
}
@ -9617,6 +9647,13 @@ emit_simple(struct svga_shader_emitter_v10 *emit,
emit->current_loop_depth--;
}
for (i = 0; i < op->num_src; i++) {
if (dbl_src)
src[i] = check_double_src(emit, &inst->Src[i]);
else
src[i] = inst->Src[i];
}
begin_emit_instruction(emit);
emit_opcode_precise(emit, translate_opcode(inst->Instruction.Opcode),
inst->Instruction.Saturate,
@ -9628,13 +9665,11 @@ emit_simple(struct svga_shader_emitter_v10 *emit,
emit_dst_register(emit, &inst->Dst[i]);
}
for (i = 0; i < op->num_src; i++) {
if (dbl_src) {
check_double_src_swizzle(&inst->Src[i]);
}
emit_src_register(emit, &inst->Src[i]);
emit_src_register(emit, &src[i]);
}
end_emit_instruction(emit);
free_temp_indexes(emit);
return TRUE;
}
@ -9925,8 +9960,15 @@ emit_vmware(struct svga_shader_emitter_v10 *emit,
const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode);
const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode);
unsigned i;
struct tgsi_full_src_register src[3];
for (i = 0; i < op->num_src; i++) {
if (dbl_src)
src[i] = check_double_src(emit, &inst->Src[i]);
else
src[i] = inst->Src[i];
}
begin_emit_instruction(emit);
@ -9951,13 +9993,11 @@ emit_vmware(struct svga_shader_emitter_v10 *emit,
}
for (i = 0; i < op->num_src; i++) {
if (dbl_src) {
check_double_src_swizzle(&inst->Src[i]);
}
emit_src_register(emit, &inst->Src[i]);
emit_src_register(emit, &src[i]);
}
end_emit_instruction(emit);
free_temp_indexes(emit);
return TRUE;
}