304 lines
10 KiB
C
304 lines
10 KiB
C
/*
|
|
* Copyright (C) 2020 Collabora Ltd.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*
|
|
* Authors (Collabora):
|
|
* Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
|
|
*/
|
|
|
|
#include "compiler.h"
|
|
|
|
bool
|
|
bi_has_arg(const bi_instr *ins, bi_index arg)
|
|
{
|
|
if (!ins)
|
|
return false;
|
|
|
|
bi_foreach_src(ins, s) {
|
|
if (bi_is_equiv(ins->src[s], arg))
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/* Precondition: valid 16-bit or 32-bit register format. Returns whether it is
|
|
* 32-bit. Note auto reads to 32-bit registers even if the memory format is
|
|
* 16-bit, so is considered as such here */
|
|
|
|
bool
|
|
bi_is_regfmt_16(enum bi_register_format fmt)
|
|
{
|
|
switch (fmt) {
|
|
case BI_REGISTER_FORMAT_F16:
|
|
case BI_REGISTER_FORMAT_S16:
|
|
case BI_REGISTER_FORMAT_U16:
|
|
return true;
|
|
case BI_REGISTER_FORMAT_F32:
|
|
case BI_REGISTER_FORMAT_S32:
|
|
case BI_REGISTER_FORMAT_U32:
|
|
case BI_REGISTER_FORMAT_AUTO:
|
|
return false;
|
|
default:
|
|
unreachable("Invalid register format");
|
|
}
|
|
}
|
|
|
|
static unsigned
|
|
bi_count_staging_registers(const bi_instr *ins)
|
|
{
|
|
enum bi_sr_count count = bi_opcode_props[ins->op].sr_count;
|
|
unsigned vecsize = ins->vecsize + 1; /* XXX: off-by-one */
|
|
|
|
switch (count) {
|
|
case BI_SR_COUNT_0 ... BI_SR_COUNT_4:
|
|
return count;
|
|
case BI_SR_COUNT_FORMAT:
|
|
return bi_is_regfmt_16(ins->register_format) ?
|
|
DIV_ROUND_UP(vecsize, 2) : vecsize;
|
|
case BI_SR_COUNT_VECSIZE:
|
|
return vecsize;
|
|
case BI_SR_COUNT_SR_COUNT:
|
|
return ins->sr_count;
|
|
}
|
|
|
|
unreachable("Invalid sr_count");
|
|
}
|
|
|
|
unsigned
|
|
bi_count_read_registers(const bi_instr *ins, unsigned s)
|
|
{
|
|
/* ATOM reads 1 but writes 2. Exception for ACMPXCHG */
|
|
if (s == 0 && ins->op == BI_OPCODE_ATOM_RETURN_I32)
|
|
return (ins->atom_opc == BI_ATOM_OPC_ACMPXCHG) ? 2 : 1;
|
|
else if (s == 0 && bi_opcode_props[ins->op].sr_read)
|
|
return bi_count_staging_registers(ins);
|
|
else if (s == 4 && ins->op == BI_OPCODE_BLEND)
|
|
return ins->sr_count_2; /* Dual source blending */
|
|
else if (s == 0 && ins->op == BI_OPCODE_SPLIT_I32)
|
|
return ins->nr_dests;
|
|
else
|
|
return 1;
|
|
}
|
|
|
|
unsigned
|
|
bi_count_write_registers(const bi_instr *ins, unsigned d)
|
|
{
|
|
if (d == 0 && bi_opcode_props[ins->op].sr_write) {
|
|
switch (ins->op) {
|
|
case BI_OPCODE_TEXC:
|
|
if (ins->sr_count_2)
|
|
return ins->sr_count;
|
|
else
|
|
return bi_is_regfmt_16(ins->register_format) ? 2 : 4;
|
|
|
|
case BI_OPCODE_TEX_SINGLE:
|
|
case BI_OPCODE_TEX_FETCH:
|
|
case BI_OPCODE_TEX_GATHER: {
|
|
unsigned chans = util_bitcount(ins->write_mask);
|
|
|
|
return bi_is_regfmt_16(ins->register_format) ?
|
|
DIV_ROUND_UP(chans, 2) : chans;
|
|
}
|
|
|
|
case BI_OPCODE_ACMPXCHG_I32:
|
|
/* Reads 2 but writes 1 */
|
|
return 1;
|
|
|
|
case BI_OPCODE_ATOM1_RETURN_I32:
|
|
/* Allow omitting the destination for plain ATOM1 */
|
|
return bi_is_null(ins->dest[0]) ? 0 : ins->sr_count;
|
|
default:
|
|
return bi_count_staging_registers(ins);
|
|
}
|
|
} else if (ins->op == BI_OPCODE_SEG_ADD_I64) {
|
|
return 2;
|
|
} else if (ins->op == BI_OPCODE_TEXC && d == 1) {
|
|
return ins->sr_count_2;
|
|
} else if (ins->op == BI_OPCODE_COLLECT_I32 && d == 0) {
|
|
return ins->nr_srcs;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
unsigned
|
|
bi_writemask(const bi_instr *ins, unsigned d)
|
|
{
|
|
unsigned mask = BITFIELD_MASK(bi_count_write_registers(ins, d));
|
|
unsigned shift = ins->dest[d].offset;
|
|
return (mask << shift);
|
|
}
|
|
|
|
bi_clause *
|
|
bi_next_clause(bi_context *ctx, bi_block *block, bi_clause *clause)
|
|
{
|
|
if (!block && !clause)
|
|
return NULL;
|
|
|
|
/* Try the first clause in this block if we're starting from scratch */
|
|
if (!clause && !list_is_empty(&block->clauses))
|
|
return list_first_entry(&block->clauses, bi_clause, link);
|
|
|
|
/* Try the next clause in this block */
|
|
if (clause && clause->link.next != &block->clauses)
|
|
return list_first_entry(&(clause->link), bi_clause, link);
|
|
|
|
/* Try the next block, or the one after that if it's empty, etc .*/
|
|
bi_block *next_block = bi_next_block(block);
|
|
|
|
bi_foreach_block_from(ctx, next_block, block) {
|
|
if (!list_is_empty(&block->clauses))
|
|
return list_first_entry(&block->clauses, bi_clause, link);
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/* Does an instruction have a side effect not captured by its register
|
|
* destination? Applies to certain message-passing instructions, +DISCARD, and
|
|
* branching only, used in dead code elimation. Branches are characterized by
|
|
* `last` which applies to them and some atomics, +BARRIER, +BLEND which
|
|
* implies no loss of generality */
|
|
|
|
bool
|
|
bi_side_effects(const bi_instr *I)
|
|
{
|
|
if (bi_opcode_props[I->op].last)
|
|
return true;
|
|
|
|
switch (I->op) {
|
|
case BI_OPCODE_DISCARD_F32:
|
|
case BI_OPCODE_DISCARD_B32:
|
|
return true;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
switch (bi_opcode_props[I->op].message) {
|
|
case BIFROST_MESSAGE_NONE:
|
|
case BIFROST_MESSAGE_VARYING:
|
|
case BIFROST_MESSAGE_ATTRIBUTE:
|
|
case BIFROST_MESSAGE_TEX:
|
|
case BIFROST_MESSAGE_VARTEX:
|
|
case BIFROST_MESSAGE_LOAD:
|
|
case BIFROST_MESSAGE_64BIT:
|
|
return false;
|
|
|
|
case BIFROST_MESSAGE_STORE:
|
|
case BIFROST_MESSAGE_ATOMIC:
|
|
case BIFROST_MESSAGE_BARRIER:
|
|
case BIFROST_MESSAGE_BLEND:
|
|
case BIFROST_MESSAGE_Z_STENCIL:
|
|
case BIFROST_MESSAGE_ATEST:
|
|
case BIFROST_MESSAGE_JOB:
|
|
return true;
|
|
|
|
case BIFROST_MESSAGE_TILE:
|
|
return (I->op != BI_OPCODE_LD_TILE);
|
|
}
|
|
|
|
unreachable("Invalid message type");
|
|
}
|
|
|
|
/* Branch reconvergence is required when the execution mask may change
|
|
* between adjacent instructions (clauses). This occurs for conditional
|
|
* branches and for the last instruction (clause) in a block whose
|
|
* fallthrough successor has multiple predecessors.
|
|
*/
|
|
|
|
bool
|
|
bi_reconverge_branches(bi_block *block)
|
|
{
|
|
/* Last block of a program */
|
|
if (!block->successors[0]) {
|
|
assert(!block->successors[1]);
|
|
return true;
|
|
}
|
|
|
|
/* Multiple successors? We're branching */
|
|
if (block->successors[1])
|
|
return true;
|
|
|
|
/* Must have at least one successor */
|
|
struct bi_block *succ = block->successors[0];
|
|
|
|
/* Reconverge if the successor has multiple predecessors */
|
|
return bi_num_predecessors(succ) > 1;
|
|
}
|
|
|
|
/*
|
|
* When MUX.i32 or MUX.v2i16 is used to multiplex entire sources, they can be
|
|
* replaced by CSEL as follows:
|
|
*
|
|
* MUX.neg(x, y, b) -> CSEL.s.lt(b, 0, x, y)
|
|
* MUX.int_zero(x, y, b) -> CSEL.i.eq(b, 0, x, y)
|
|
* MUX.fp_zero(x, y, b) -> CSEL.f.eq(b, 0, x, y)
|
|
*
|
|
* MUX.bit cannot be transformed like this.
|
|
*
|
|
* Note that MUX.v2i16 has partial support for swizzles, which CSEL.v2i16 lacks.
|
|
* So we must check the swizzles too.
|
|
*/
|
|
bool
|
|
bi_can_replace_with_csel(bi_instr *I)
|
|
{
|
|
return ((I->op == BI_OPCODE_MUX_I32) || (I->op == BI_OPCODE_MUX_V2I16)) &&
|
|
(I->mux != BI_MUX_BIT) &&
|
|
(I->src[0].swizzle == BI_SWIZZLE_H01) &&
|
|
(I->src[1].swizzle == BI_SWIZZLE_H01) &&
|
|
(I->src[2].swizzle == BI_SWIZZLE_H01);
|
|
}
|
|
|
|
static enum bi_opcode
|
|
bi_csel_for_mux(bool must_sign, bool b32, enum bi_mux mux)
|
|
{
|
|
switch (mux) {
|
|
case BI_MUX_INT_ZERO:
|
|
if (must_sign)
|
|
return b32 ? BI_OPCODE_CSEL_U32 : BI_OPCODE_CSEL_V2U16;
|
|
else
|
|
return b32 ? BI_OPCODE_CSEL_I32 : BI_OPCODE_CSEL_V2I16;
|
|
case BI_MUX_NEG:
|
|
return b32 ? BI_OPCODE_CSEL_S32 : BI_OPCODE_CSEL_V2S16;
|
|
case BI_MUX_FP_ZERO:
|
|
return b32 ? BI_OPCODE_CSEL_F32 : BI_OPCODE_CSEL_V2F16;
|
|
default:
|
|
unreachable("No CSEL for MUX.bit");
|
|
}
|
|
}
|
|
|
|
void
|
|
bi_replace_mux_with_csel(bi_instr *I, bool must_sign)
|
|
{
|
|
assert(I->op == BI_OPCODE_MUX_I32 || I->op == BI_OPCODE_MUX_V2I16);
|
|
I->op = bi_csel_for_mux(must_sign, I->op == BI_OPCODE_MUX_I32, I->mux);
|
|
I->cmpf = (I->mux == BI_MUX_NEG) ? BI_CMPF_LT : BI_CMPF_EQ;
|
|
I->mux = 0;
|
|
|
|
bi_index vTrue = I->src[0], vFalse = I->src[1], cond = I->src[2];
|
|
|
|
I->src[0] = cond;
|
|
I->src[1] = bi_zero();
|
|
I->src[2] = vTrue;
|
|
I->src[3] = vFalse;
|
|
}
|