ir3: Implement source restrictions for shared ALU

cat1-cat4 instructions executed on the shared ALU can use shared
registers in an unlimited capacity, as opposed to the vector ALU which
apparently treats shared registers and consts the same. However they
cannot use "normal" sources (which must be "uniformized" via a mov).

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22075>
This commit is contained in:
Connor Abbott 2023-02-16 18:38:13 +01:00 committed by Marge Bot
parent 876c5396a7
commit 06cf178ede
2 changed files with 66 additions and 10 deletions

View File

@ -1066,9 +1066,6 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags)
struct ir3_compiler *compiler = instr->block->shader->compiler;
unsigned valid_flags;
if ((flags & IR3_REG_SHARED) && opc_cat(instr->opc) > 3)
return false;
flags = cp_flags(flags);
/* If destination is indirect, then source cannot be.. at least
@ -1137,9 +1134,30 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags)
else
return flags == 0;
break;
default:
default: {
valid_flags =
IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_RELATIV | IR3_REG_SHARED;
/* floating-point conversions when moving from non-shared to shared
* seem not to work. We only use floating-point types in ir3 for
* conversions, so don't bother specially handling the case where the
* types are equal.
*/
if ((instr->dsts[0]->flags & IR3_REG_SHARED) &&
!(flags & (IR3_REG_SHARED | IR3_REG_IMMED | IR3_REG_CONST)) &&
(full_type(instr->cat1.src_type) == TYPE_F32 ||
full_type(instr->cat1.dst_type) == TYPE_F32))
return false;
/* Conversions seem not to work in shared->shared copies before scalar
* ALU is supported.
*/
if (!compiler->has_scalar_alu &&
(flags & IR3_REG_SHARED) &&
(instr->dsts[0]->flags & IR3_REG_SHARED) &&
instr->cat1.src_type != instr->cat1.dst_type)
return false;
}
}
if (flags & ~valid_flags)
return false;
@ -1156,6 +1174,12 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags)
n == 1 && flags == IR3_REG_IMMED)
return true;
/* cat2/cat3 scalar ALU instructions must not have regular sources. */
if (instr->dsts[0]->flags & IR3_REG_SHARED) {
if (!(flags & (IR3_REG_SHARED | IR3_REG_IMMED | IR3_REG_CONST)))
return false;
}
if (flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_SHARED)) {
unsigned m = n ^ 1;
/* cannot deal w/ const or shared in both srcs:
@ -1163,9 +1187,14 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags)
*/
if (m < instr->srcs_count) {
struct ir3_register *reg = instr->srcs[m];
if ((flags & (IR3_REG_CONST | IR3_REG_SHARED)) &&
(reg->flags & (IR3_REG_CONST | IR3_REG_SHARED)))
return false;
if (instr->dsts[0]->flags & IR3_REG_SHARED) {
if ((flags & IR3_REG_CONST) && (reg->flags & IR3_REG_CONST))
return false;
} else {
if ((flags & (IR3_REG_CONST | IR3_REG_SHARED)) &&
(reg->flags & (IR3_REG_CONST | IR3_REG_SHARED)))
return false;
}
if ((flags & IR3_REG_IMMED) && reg->flags & (IR3_REG_IMMED))
return false;
}
@ -1204,14 +1233,23 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags)
if (flags & ~valid_flags)
return false;
if (flags & (IR3_REG_CONST | IR3_REG_SHARED | IR3_REG_RELATIV)) {
if (flags & (IR3_REG_CONST | IR3_REG_RELATIV) ||
(!(instr->dsts[0]->flags & IR3_REG_SHARED) &&
(flags & IR3_REG_SHARED))) {
/* cannot deal w/ const/shared/relativ in 2nd src: */
if (n == 1)
return false;
}
if (instr->dsts[0]->flags & IR3_REG_SHARED) {
if (!(flags & (IR3_REG_SHARED | IR3_REG_IMMED | IR3_REG_CONST)))
return false;
}
break;
case 4:
if ((instr->dsts[0]->flags & IR3_REG_SHARED) != (flags & IR3_REG_SHARED))
return false;
/* seems like blob compiler avoids const as src.. */
/* TODO double check if this is still the case on a4xx */
if (flags & (IR3_REG_CONST | IR3_REG_IMMED))
@ -1226,6 +1264,10 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags)
break;
case 6:
valid_flags = IR3_REG_IMMED;
if (instr->opc == OPC_STC && n == 1)
valid_flags |= IR3_REG_SHARED;
if (flags & ~valid_flags)
return false;

View File

@ -50,6 +50,19 @@ clone_with_predicate_dst(struct opt_predicates_ctx *ctx,
return clone;
}
static bool
is_shared_or_const(struct ir3_register *reg)
{
return reg->flags & (IR3_REG_CONST | IR3_REG_SHARED);
}
static bool
cat2_needs_scalar_alu(struct ir3_instruction *instr)
{
return is_shared_or_const(instr->srcs[0]) &&
(instr->srcs_count == 1 || is_shared_or_const(instr->srcs[1]));
}
static bool
can_write_predicate(struct opt_predicates_ctx *ctx,
struct ir3_instruction *instr)
@ -58,13 +71,14 @@ can_write_predicate(struct opt_predicates_ctx *ctx,
case OPC_CMPS_S:
case OPC_CMPS_U:
case OPC_CMPS_F:
return true;
return !cat2_needs_scalar_alu(instr);
case OPC_AND_B:
case OPC_OR_B:
case OPC_NOT_B:
case OPC_XOR_B:
case OPC_GETBIT_B:
return ctx->ir->compiler->bitops_can_write_predicates;
return ctx->ir->compiler->bitops_can_write_predicates &&
!cat2_needs_scalar_alu(instr);
default:
return false;
}