pan/bi: Remove old IR packs
Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8135>
This commit is contained in:
parent
8b0d0a931b
commit
a1e150fc4d
|
@ -23,13 +23,6 @@
|
|||
|
||||
#include "compiler.h"
|
||||
#include "bi_print.h"
|
||||
#include "bi_generated_pack.h"
|
||||
|
||||
#define RETURN_PACKED(str) { \
|
||||
uint64_t temp = 0; \
|
||||
memcpy(&temp, &str, sizeof(str)); \
|
||||
return temp; \
|
||||
}
|
||||
|
||||
/* This file contains the final passes of the compiler. Running after
|
||||
* scheduling and RA, the IR is now finalized, so we need to emit it to actual
|
||||
|
@ -388,543 +381,6 @@ bi_pack_registers(bi_registers regs)
|
|||
return packed;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
bi_pack_fma_special(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
|
||||
{
|
||||
switch (ins->op.special) {
|
||||
case BI_SPECIAL_CUBEFACE1:
|
||||
return pan_pack_fma_cubeface1(clause, ins, regs);
|
||||
default:
|
||||
unreachable("Unknown special op");
|
||||
}
|
||||
}
|
||||
|
||||
#define BI_PACK_SHIFT(name) \
|
||||
static unsigned \
|
||||
bi_pack_fma_ ## name(bi_clause *clause, bi_instruction *ins, bi_registers *regs) \
|
||||
{ \
|
||||
switch (nir_alu_type_get_type_size(ins->dest_type)) { \
|
||||
case 32: \
|
||||
return pan_pack_fma_ ## name ## _i32(clause, ins, regs); \
|
||||
case 16: \
|
||||
return pan_pack_fma_ ## name ## _v2i16(clause, ins, regs); \
|
||||
case 8: \
|
||||
return pan_pack_fma_ ## name ## _v4i8(clause, ins, regs); \
|
||||
default: \
|
||||
unreachable("Invalid dest size"); \
|
||||
} \
|
||||
}
|
||||
|
||||
BI_PACK_SHIFT(rshift_and)
|
||||
BI_PACK_SHIFT(lshift_and)
|
||||
BI_PACK_SHIFT(rshift_or)
|
||||
BI_PACK_SHIFT(lshift_or)
|
||||
BI_PACK_SHIFT(rshift_xor)
|
||||
BI_PACK_SHIFT(lshift_xor)
|
||||
BI_PACK_SHIFT(arshift)
|
||||
|
||||
static unsigned
|
||||
bi_pack_fma_bitwise(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
|
||||
{
|
||||
switch (ins->op.bitwise) {
|
||||
case BI_BITWISE_AND:
|
||||
return ins->bitwise.rshift ?
|
||||
bi_pack_fma_rshift_and(clause, ins, regs) :
|
||||
bi_pack_fma_lshift_and(clause, ins, regs);
|
||||
case BI_BITWISE_OR:
|
||||
return ins->bitwise.rshift ?
|
||||
bi_pack_fma_rshift_or(clause, ins, regs) :
|
||||
bi_pack_fma_lshift_or(clause, ins, regs);
|
||||
case BI_BITWISE_XOR:
|
||||
return ins->bitwise.rshift ?
|
||||
bi_pack_fma_rshift_xor(clause, ins, regs) :
|
||||
bi_pack_fma_lshift_xor(clause, ins, regs);
|
||||
case BI_BITWISE_ARSHIFT:
|
||||
assert(ins->bitwise.rshift);
|
||||
return bi_pack_fma_arshift(clause, ins, regs);
|
||||
default:
|
||||
unreachable("Invalid bitwise op");
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned
|
||||
pan_pack_fma(bi_clause *clause, bi_bundle bundle, bi_registers *regs)
|
||||
{
|
||||
if (!bundle.fma)
|
||||
return pan_pack_fma_nop_i32(clause, NULL, regs);
|
||||
|
||||
bool f16 = bundle.fma->dest_type == nir_type_float16;
|
||||
bool f32 = bundle.fma->dest_type == nir_type_float32;
|
||||
bool u32 = bundle.fma->dest_type == nir_type_uint32 ||
|
||||
bundle.fma->dest_type == nir_type_bool32;
|
||||
bool u16 = bundle.fma->dest_type == nir_type_uint16;
|
||||
bool s32 = bundle.fma->dest_type == nir_type_int32;
|
||||
bool s16 = bundle.fma->dest_type == nir_type_int16;
|
||||
|
||||
bool src0_f16 = bundle.fma->src_types[0] == nir_type_float16;
|
||||
bool src0_f32 = bundle.fma->src_types[0] == nir_type_float32;
|
||||
bool src0_u16 = bundle.fma->src_types[0] == nir_type_uint16;
|
||||
bool src0_s16 = bundle.fma->src_types[0] == nir_type_int16;
|
||||
bool src0_s8 = bundle.fma->src_types[0] == nir_type_int8;
|
||||
bool src0_u8 = bundle.fma->src_types[0] == nir_type_uint8;
|
||||
|
||||
enum bi_cond cond = bundle.fma->cond;
|
||||
bool typeless_cond = (cond == BI_COND_EQ) || (cond == BI_COND_NE);
|
||||
|
||||
switch (bundle.fma->type) {
|
||||
case BI_ADD:
|
||||
if (bundle.fma->dest_type == nir_type_float32)
|
||||
return pan_pack_fma_fadd_f32(clause, bundle.fma, regs);
|
||||
else if (bundle.fma->dest_type == nir_type_float16)
|
||||
return pan_pack_fma_fadd_v2f16(clause, bundle.fma, regs);
|
||||
|
||||
unreachable("TODO");
|
||||
case BI_CMP:
|
||||
assert (src0_f16 || src0_f32);
|
||||
|
||||
if (src0_f32)
|
||||
return pan_pack_fma_fcmp_f32(clause, bundle.fma, regs);
|
||||
else
|
||||
return pan_pack_fma_fcmp_v2f16(clause, bundle.fma, regs);
|
||||
case BI_BITWISE:
|
||||
return bi_pack_fma_bitwise(clause, bundle.fma, regs);
|
||||
case BI_CONVERT:
|
||||
if (src0_s8) {
|
||||
assert(s32);
|
||||
return pan_pack_fma_s8_to_s32(clause, bundle.fma, regs);
|
||||
} else if (src0_u8) {
|
||||
assert(u32);
|
||||
return pan_pack_fma_u8_to_u32(clause, bundle.fma, regs);
|
||||
} else if (src0_s16) {
|
||||
assert(s32);
|
||||
return pan_pack_fma_s16_to_s32(clause, bundle.fma, regs);
|
||||
} else if (src0_u16) {
|
||||
assert(u32);
|
||||
return pan_pack_fma_u16_to_u32(clause, bundle.fma, regs);
|
||||
} else if (src0_f16) {
|
||||
assert(f32);
|
||||
return pan_pack_fma_f16_to_f32(clause, bundle.fma, regs);
|
||||
} else if (src0_f32) {
|
||||
assert(f16);
|
||||
return pan_pack_fma_v2f32_to_v2f16(clause, bundle.fma, regs);
|
||||
}
|
||||
|
||||
unreachable("Invalid FMA convert");
|
||||
case BI_CSEL:
|
||||
if (f32)
|
||||
return pan_pack_fma_csel_f32(clause, bundle.fma, regs);
|
||||
else if (f16)
|
||||
return pan_pack_fma_csel_v2f16(clause, bundle.fma, regs);
|
||||
else if ((u32 || s32) && typeless_cond)
|
||||
return pan_pack_fma_csel_i32(clause, bundle.fma, regs);
|
||||
else if ((u16 || s16) && typeless_cond)
|
||||
return pan_pack_fma_csel_v2i16(clause, bundle.fma, regs);
|
||||
else if (u32)
|
||||
return pan_pack_fma_csel_u32(clause, bundle.fma, regs);
|
||||
else if (u16)
|
||||
return pan_pack_fma_csel_v2u16(clause, bundle.fma, regs);
|
||||
else if (s32)
|
||||
return pan_pack_fma_csel_s32(clause, bundle.fma, regs);
|
||||
else if (s16)
|
||||
return pan_pack_fma_csel_v2s16(clause, bundle.fma, regs);
|
||||
else
|
||||
unreachable("Invalid csel type");
|
||||
case BI_FMA:
|
||||
if (bundle.fma->dest_type == nir_type_float32) {
|
||||
if (bundle.fma->op.mscale)
|
||||
return pan_pack_fma_fma_rscale_f32(clause, bundle.fma, regs);
|
||||
else
|
||||
return pan_pack_fma_fma_f32(clause, bundle.fma, regs);
|
||||
} else {
|
||||
assert(bundle.fma->dest_type == nir_type_float16);
|
||||
|
||||
if (bundle.fma->op.mscale)
|
||||
return pan_pack_fma_fma_rscale_v2f16(clause, bundle.fma, regs);
|
||||
else
|
||||
return pan_pack_fma_fma_v2f16(clause, bundle.fma, regs);
|
||||
}
|
||||
case BI_FREXP:
|
||||
assert(src0_f32 || src0_f16);
|
||||
|
||||
if (src0_f32)
|
||||
return pan_pack_fma_frexpe_f32(clause, bundle.fma, regs);
|
||||
else
|
||||
return pan_pack_fma_frexpe_v2f16(clause, bundle.fma, regs);
|
||||
case BI_IMATH:
|
||||
/* XXX: Only 32-bit, with carries/borrows forced */
|
||||
assert(s32 || u32);
|
||||
|
||||
if (bundle.fma->op.imath == BI_IMATH_ADD)
|
||||
return pan_pack_fma_iaddc_i32(clause, bundle.fma, regs);
|
||||
else
|
||||
return pan_pack_fma_isubb_i32(clause, bundle.fma, regs);
|
||||
case BI_MOV:
|
||||
return pan_pack_fma_mov_i32(clause, bundle.fma, regs);
|
||||
case BI_SELECT:
|
||||
if (nir_alu_type_get_type_size(bundle.fma->src_types[0]) == 16) {
|
||||
return pan_pack_fma_mkvec_v2i16(clause, bundle.fma, regs);
|
||||
} else {
|
||||
assert(nir_alu_type_get_type_size(bundle.fma->src_types[0]) == 8);
|
||||
return pan_pack_fma_mkvec_v4i8(clause, bundle.fma, regs);
|
||||
}
|
||||
case BI_ROUND:
|
||||
assert(f16 || f32);
|
||||
|
||||
if (f16)
|
||||
return pan_pack_fma_fround_v2f16(clause, bundle.fma, regs);
|
||||
else
|
||||
return pan_pack_fma_fround_f32(clause, bundle.fma, regs);
|
||||
case BI_REDUCE_FMA:
|
||||
assert(src0_f32 && f32);
|
||||
return pan_pack_fma_fadd_lscale_f32(clause, bundle.fma, regs);
|
||||
case BI_IMUL:
|
||||
return pan_pack_fma_imul_i32(clause, bundle.fma, regs);
|
||||
case BI_SPECIAL_FMA:
|
||||
return bi_pack_fma_special(clause, bundle.fma, regs);
|
||||
default:
|
||||
unreachable("Cannot encode class as FMA");
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned
|
||||
bi_pack_add_branch_cond(bi_instruction *ins, bi_registers *regs)
|
||||
{
|
||||
assert(ins->cond == BI_COND_EQ);
|
||||
assert(ins->src[1] == BIR_INDEX_ZERO);
|
||||
|
||||
unsigned zero_ctrl = 0;
|
||||
unsigned size = nir_alu_type_get_type_size(ins->src_types[0]);
|
||||
|
||||
if (size == 16) {
|
||||
/* See BR_SIZE_ZERO swizzle disassembly */
|
||||
zero_ctrl = ins->swizzle[0][0] ? 1 : 2;
|
||||
} else {
|
||||
assert(size == 32);
|
||||
}
|
||||
|
||||
/* EQ swap to NE */
|
||||
bool slot_swapped = false;
|
||||
|
||||
struct bifrost_branch pack = {
|
||||
.src0 = bi_get_src(ins, regs, 0),
|
||||
.src1 = (zero_ctrl << 1) | !slot_swapped,
|
||||
.cond = BR_COND_EQ,
|
||||
.size = BR_SIZE_ZERO,
|
||||
.op = BIFROST_ADD_OP_BRANCH
|
||||
};
|
||||
|
||||
if (ins->branch_target) {
|
||||
/* We assigned the constant slot to fetch the branch offset so
|
||||
* we can just passthrough here. We put in the HI slot to match
|
||||
* the blob since that's where the magic flags end up
|
||||
*/
|
||||
assert(!ins->src[2]);
|
||||
pack.src2 = BIFROST_SRC_FAU_HI;
|
||||
} else {
|
||||
pack.src2 = bi_get_src(ins, regs, 2);
|
||||
}
|
||||
|
||||
RETURN_PACKED(pack);
|
||||
}
|
||||
|
||||
static unsigned
|
||||
bi_pack_add_branch_uncond(bi_instruction *ins, bi_registers *regs)
|
||||
{
|
||||
struct bifrost_branch pack = {
|
||||
/* It's unclear what these bits actually mean */
|
||||
.src0 = BIFROST_SRC_FAU_LO,
|
||||
.src1 = BIFROST_SRC_PASS_FMA,
|
||||
|
||||
/* All ones in fact */
|
||||
.cond = (BR_ALWAYS & 0x7),
|
||||
.size = (BR_ALWAYS >> 3),
|
||||
.op = BIFROST_ADD_OP_BRANCH
|
||||
};
|
||||
|
||||
if (ins->branch_target) {
|
||||
/* Offset is passed as a PC-relative offset through an
|
||||
* embedded constant.
|
||||
*/
|
||||
assert(!ins->src[2]);
|
||||
pack.src2 = BIFROST_SRC_FAU_HI;
|
||||
} else {
|
||||
pack.src2 = bi_get_src(ins, regs, 2);
|
||||
}
|
||||
|
||||
RETURN_PACKED(pack);
|
||||
}
|
||||
|
||||
static unsigned
|
||||
bi_pack_add_branch(bi_instruction *ins, bi_registers *regs)
|
||||
{
|
||||
if (ins->cond == BI_COND_ALWAYS)
|
||||
return bi_pack_add_branch_uncond(ins, regs);
|
||||
else
|
||||
return bi_pack_add_branch_cond(ins, regs);
|
||||
}
|
||||
|
||||
static unsigned
|
||||
bi_pack_add_special(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
|
||||
{
|
||||
bool f16 = ins->dest_type == nir_type_float16;
|
||||
|
||||
switch (ins->op.special) {
|
||||
case BI_SPECIAL_FRCP:
|
||||
return f16 ? pan_pack_add_frcp_f16(clause, ins, regs) :
|
||||
pan_pack_add_frcp_f32(clause, ins, regs);
|
||||
case BI_SPECIAL_FRSQ:
|
||||
return f16 ? pan_pack_add_frsq_f16(clause, ins, regs) :
|
||||
pan_pack_add_frsq_f32(clause, ins, regs);
|
||||
case BI_SPECIAL_EXP2_LOW:
|
||||
assert(!f16);
|
||||
return pan_pack_add_fexp_f32(clause, ins, regs);
|
||||
case BI_SPECIAL_IABS:
|
||||
assert(ins->src_types[0] == nir_type_int32);
|
||||
return pan_pack_add_iabs_s32(clause, ins, regs);
|
||||
case BI_SPECIAL_CUBEFACE2:
|
||||
return pan_pack_add_cubeface2(clause, ins, regs);
|
||||
case BI_SPECIAL_CUBE_SSEL:
|
||||
return pan_pack_add_cube_ssel(clause, ins, regs);
|
||||
case BI_SPECIAL_CUBE_TSEL:
|
||||
return pan_pack_add_cube_tsel(clause, ins, regs);
|
||||
case BI_SPECIAL_CLPER_V6:
|
||||
return pan_pack_add_clper_v6_i32(clause, ins, regs);
|
||||
case BI_SPECIAL_CLPER_V7:
|
||||
return pan_pack_add_clper_v7_i32(clause, ins, regs);
|
||||
default:
|
||||
unreachable("Unknown special op");
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned
|
||||
bi_pack_add_ld_var(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
|
||||
{
|
||||
if (ins->load_vary.special)
|
||||
return pan_pack_add_ld_var_special(clause, ins, regs);
|
||||
|
||||
if (ins->load_vary.flat) {
|
||||
return ins->load_vary.immediate ?
|
||||
pan_pack_add_ld_var_flat_imm(clause, ins, regs) :
|
||||
pan_pack_add_ld_var_flat(clause, ins, regs);
|
||||
}
|
||||
|
||||
return ins->load_vary.immediate ?
|
||||
pan_pack_add_ld_var_imm(clause, ins, regs) :
|
||||
pan_pack_add_ld_var(clause, ins, regs);
|
||||
}
|
||||
|
||||
static unsigned
|
||||
pan_pack_add(bi_clause *clause, bi_bundle bundle, bi_registers *regs, gl_shader_stage stage)
|
||||
{
|
||||
if (!bundle.add)
|
||||
return pan_pack_add_nop_i32(clause, NULL, regs);
|
||||
|
||||
bool f16 = bundle.add->dest_type == nir_type_float16;
|
||||
bool f32 = bundle.add->dest_type == nir_type_float32;
|
||||
bool u32 = bundle.add->dest_type == nir_type_uint32 ||
|
||||
bundle.add->dest_type == nir_type_bool32;
|
||||
bool u16 = bundle.add->dest_type == nir_type_uint16;
|
||||
bool s32 = bundle.add->dest_type == nir_type_int32;
|
||||
bool s16 = bundle.add->dest_type == nir_type_int16;
|
||||
|
||||
bool src0_f16 = bundle.add->src_types[0] == nir_type_float16;
|
||||
bool src0_f32 = bundle.add->src_types[0] == nir_type_float32;
|
||||
bool src0_u32 = bundle.add->src_types[0] == nir_type_uint32;
|
||||
bool src0_u16 = bundle.add->src_types[0] == nir_type_uint16;
|
||||
bool src0_u8 = bundle.add->src_types[0] == nir_type_uint8;
|
||||
bool src0_s32 = bundle.add->src_types[0] == nir_type_int32;
|
||||
bool src0_s16 = bundle.add->src_types[0] == nir_type_int16;
|
||||
bool src0_s8 = bundle.add->src_types[0] == nir_type_int8;
|
||||
|
||||
unsigned sz = nir_alu_type_get_type_size(bundle.add->dest_type);
|
||||
enum bi_cond cond = bundle.add->cond;
|
||||
bool typeless_cond = (cond == BI_COND_EQ) || (cond == BI_COND_NE);
|
||||
|
||||
switch (bundle.add->type) {
|
||||
case BI_ADD:
|
||||
if (bundle.add->dest_type == nir_type_float32)
|
||||
return pan_pack_add_fadd_f32(clause, bundle.add, regs);
|
||||
else if (bundle.add->dest_type == nir_type_float16)
|
||||
return pan_pack_add_fadd_v2f16(clause, bundle.add, regs);
|
||||
|
||||
unreachable("TODO");
|
||||
case BI_ATEST:
|
||||
return pan_pack_add_atest(clause, bundle.add, regs);
|
||||
case BI_BRANCH:
|
||||
return bi_pack_add_branch(bundle.add, regs);
|
||||
case BI_CMP:
|
||||
if (src0_f32)
|
||||
return pan_pack_add_fcmp_f32(clause, bundle.add, regs);
|
||||
else if (src0_f16)
|
||||
return pan_pack_add_fcmp_v2f16(clause, bundle.add, regs);
|
||||
else if ((src0_u32 || src0_s32) && typeless_cond)
|
||||
return pan_pack_add_icmp_i32(clause, bundle.add, regs);
|
||||
else if ((src0_u16 || src0_s16) && typeless_cond)
|
||||
return pan_pack_add_icmp_v2i16(clause, bundle.add, regs);
|
||||
else if ((src0_u8 || src0_s8) && typeless_cond)
|
||||
return pan_pack_add_icmp_v4i8(clause, bundle.add, regs);
|
||||
else if (src0_u32)
|
||||
return pan_pack_add_icmp_u32(clause, bundle.add, regs);
|
||||
else if (src0_u16)
|
||||
return pan_pack_add_icmp_v2u16(clause, bundle.add, regs);
|
||||
else if (src0_u8)
|
||||
return pan_pack_add_icmp_v4u8(clause, bundle.add, regs);
|
||||
else if (src0_s32)
|
||||
return pan_pack_add_icmp_s32(clause, bundle.add, regs);
|
||||
else if (src0_s16)
|
||||
return pan_pack_add_icmp_v2s16(clause, bundle.add, regs);
|
||||
else if (src0_s8)
|
||||
return pan_pack_add_icmp_v4s8(clause, bundle.add, regs);
|
||||
else
|
||||
unreachable("Invalid cmp type");
|
||||
case BI_BLEND:
|
||||
return pan_pack_add_blend(clause, bundle.add, regs);
|
||||
case BI_BITWISE:
|
||||
unreachable("Packing todo");
|
||||
case BI_CONVERT:
|
||||
if (src0_f16 && s16)
|
||||
return pan_pack_add_v2f16_to_v2s16(clause, bundle.add, regs);
|
||||
else if (src0_f16 && u16)
|
||||
return pan_pack_add_v2f16_to_v2u16(clause, bundle.add, regs);
|
||||
else if (src0_f16 && s32)
|
||||
return pan_pack_add_f16_to_s32(clause, bundle.add, regs);
|
||||
else if (src0_f16 && u32)
|
||||
return pan_pack_add_f16_to_u32(clause, bundle.add, regs);
|
||||
else if (src0_s16 && f16)
|
||||
return pan_pack_add_v2s16_to_v2f16(clause, bundle.add, regs);
|
||||
else if (src0_u16 && f16)
|
||||
return pan_pack_add_v2u16_to_v2f16(clause, bundle.add, regs);
|
||||
else if (src0_s8 && s16)
|
||||
return pan_pack_add_v2s8_to_v2s16(clause, bundle.add, regs);
|
||||
else if (src0_u8 && u16)
|
||||
return pan_pack_add_v2u8_to_v2u16(clause, bundle.add, regs);
|
||||
else if (src0_s8 && f16)
|
||||
return pan_pack_add_v2s8_to_v2f16(clause, bundle.add, regs);
|
||||
else if (src0_u8 && f16)
|
||||
return pan_pack_add_v2u8_to_v2f16(clause, bundle.add, regs);
|
||||
else if (src0_f32 && s32)
|
||||
return pan_pack_add_f32_to_s32(clause, bundle.add, regs);
|
||||
else if (src0_f32 && u32)
|
||||
return pan_pack_add_f32_to_u32(clause, bundle.add, regs);
|
||||
else if (src0_s8 && s32)
|
||||
return pan_pack_add_s8_to_s32(clause, bundle.add, regs);
|
||||
else if (src0_u8 && u32)
|
||||
return pan_pack_add_u8_to_u32(clause, bundle.add, regs);
|
||||
else if (src0_s8 && f32)
|
||||
return pan_pack_add_s8_to_f32(clause, bundle.add, regs);
|
||||
else if (src0_u8 && f32)
|
||||
return pan_pack_add_u8_to_f32(clause, bundle.add, regs);
|
||||
else if (src0_s32 && f32)
|
||||
return pan_pack_add_s32_to_f32(clause, bundle.add, regs);
|
||||
else if (src0_u32 && f32)
|
||||
return pan_pack_add_u32_to_f32(clause, bundle.add, regs);
|
||||
else if (src0_s16 && s32)
|
||||
return pan_pack_add_s16_to_s32(clause, bundle.add, regs);
|
||||
else if (src0_u16 && u32)
|
||||
return pan_pack_add_u16_to_u32(clause, bundle.add, regs);
|
||||
else if (src0_s16 && f32)
|
||||
return pan_pack_add_s16_to_f32(clause, bundle.add, regs);
|
||||
else if (src0_u16 && f32)
|
||||
return pan_pack_add_u16_to_f32(clause, bundle.add, regs);
|
||||
else if (src0_f16 && f32)
|
||||
return pan_pack_add_f16_to_f32(clause, bundle.add, regs);
|
||||
else if (src0_f32 && f16)
|
||||
return pan_pack_add_v2f32_to_v2f16(clause, bundle.add, regs);
|
||||
else
|
||||
unreachable("Invalid ADD convert");
|
||||
case BI_DISCARD:
|
||||
return pan_pack_add_discard_f32(clause, bundle.add, regs);
|
||||
case BI_FREXP:
|
||||
unreachable("Packing todo");
|
||||
case BI_IMATH:
|
||||
assert(sz == 8 || sz == 16 || sz == 32);
|
||||
|
||||
if (bundle.add->op.imath == BI_IMATH_ADD) {
|
||||
return (sz == 8) ? pan_pack_add_iadd_v4s8(clause, bundle.add, regs) :
|
||||
(sz == 16) ? pan_pack_add_iadd_v2s16(clause, bundle.add, regs) :
|
||||
pan_pack_add_iadd_s32(clause, bundle.add, regs);
|
||||
} else {
|
||||
return (sz == 8) ? pan_pack_add_isub_v4s8(clause, bundle.add, regs) :
|
||||
(sz == 16) ? pan_pack_add_isub_v2s16(clause, bundle.add, regs) :
|
||||
pan_pack_add_isub_s32(clause, bundle.add, regs);
|
||||
}
|
||||
case BI_LOAD_ATTR:
|
||||
return bundle.add->attribute.immediate ?
|
||||
pan_pack_add_ld_attr_imm(clause, bundle.add, regs) :
|
||||
pan_pack_add_ld_attr(clause, bundle.add, regs);
|
||||
case BI_LOAD:
|
||||
case BI_LOAD_UNIFORM:
|
||||
assert(u32 || s32 || f32);
|
||||
switch (bundle.add->vector_channels) {
|
||||
case 1: return pan_pack_add_load_i32(clause, bundle.add, regs);
|
||||
case 2: return pan_pack_add_load_i64(clause, bundle.add, regs);
|
||||
case 3: return pan_pack_add_load_i96(clause, bundle.add, regs);
|
||||
case 4: return pan_pack_add_load_i128(clause, bundle.add, regs);
|
||||
default: unreachable("Invalid channel count");
|
||||
}
|
||||
case BI_LOAD_VAR:
|
||||
return bi_pack_add_ld_var(clause, bundle.add, regs);
|
||||
case BI_LOAD_VAR_ADDRESS:
|
||||
return bundle.add->attribute.immediate ?
|
||||
pan_pack_add_lea_attr_imm(clause, bundle.add, regs) :
|
||||
pan_pack_add_lea_attr(clause, bundle.add, regs);
|
||||
case BI_LOAD_TILE:
|
||||
return pan_pack_add_ld_tile(clause, bundle.add, regs);
|
||||
case BI_MINMAX:
|
||||
if (bundle.add->op.minmax == BI_MINMAX_MIN) {
|
||||
if (bundle.add->dest_type == nir_type_float32)
|
||||
return pan_pack_add_fmin_f32(clause, bundle.add, regs);
|
||||
else if (bundle.add->dest_type == nir_type_float16)
|
||||
return pan_pack_add_fmin_v2f16(clause, bundle.add, regs);
|
||||
unreachable("TODO");
|
||||
} else {
|
||||
if (bundle.add->dest_type == nir_type_float32)
|
||||
return pan_pack_add_fmax_f32(clause, bundle.add, regs);
|
||||
else if (bundle.add->dest_type == nir_type_float16)
|
||||
return pan_pack_add_fmax_v2f16(clause, bundle.add, regs);
|
||||
unreachable("TODO");
|
||||
}
|
||||
case BI_MOV:
|
||||
unreachable("Packing todo");
|
||||
case BI_STORE:
|
||||
assert(src0_u32 || src0_s32 || src0_f32);
|
||||
switch (bundle.add->vector_channels) {
|
||||
case 1: return pan_pack_add_store_i32(clause, bundle.add, regs);
|
||||
case 2: return pan_pack_add_store_i64(clause, bundle.add, regs);
|
||||
case 3: return pan_pack_add_store_i96(clause, bundle.add, regs);
|
||||
case 4: return pan_pack_add_store_i128(clause, bundle.add, regs);
|
||||
default: unreachable("Invalid channel count");
|
||||
}
|
||||
case BI_STORE_VAR:
|
||||
return pan_pack_add_st_cvt(clause, bundle.add, regs);
|
||||
case BI_SPECIAL_ADD:
|
||||
return bi_pack_add_special(clause, bundle.add, regs);
|
||||
case BI_TABLE:
|
||||
assert(bundle.add->dest_type == nir_type_float32);
|
||||
return pan_pack_add_flogd_f32(clause, bundle.add, regs);
|
||||
case BI_SELECT:
|
||||
assert(nir_alu_type_get_type_size(bundle.add->src_types[0]) == 16);
|
||||
return pan_pack_add_mkvec_v2i16(clause, bundle.add, regs);
|
||||
case BI_TEXC:
|
||||
return pan_pack_add_texc(clause, bundle.add, regs);
|
||||
case BI_TEXC_DUAL:
|
||||
unreachable("Packing todo");
|
||||
case BI_TEXS:
|
||||
assert(f16 || f32);
|
||||
|
||||
if (f16)
|
||||
return pan_pack_add_texs_2d_f16(clause, bundle.add, regs);
|
||||
else
|
||||
return pan_pack_add_texs_2d_f32(clause, bundle.add, regs);
|
||||
case BI_ROUND:
|
||||
unreachable("Packing todo");
|
||||
case BI_ZS_EMIT:
|
||||
return pan_pack_add_zs_emit(clause, bundle.add, regs);
|
||||
default:
|
||||
unreachable("Cannot encode class as ADD");
|
||||
}
|
||||
}
|
||||
|
||||
struct bi_packed_bundle {
|
||||
uint64_t lo;
|
||||
uint64_t hi;
|
||||
|
|
|
@ -1,556 +0,0 @@
|
|||
#
|
||||
# Copyright (C) 2020 Collabora, Ltd.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice (including the next
|
||||
# paragraph) shall be included in all copies or substantial portions of the
|
||||
# Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
|
||||
import sys
|
||||
from bifrost_isa import parse_instructions, opname_to_c, COPYRIGHT
|
||||
from mako.template import Template
|
||||
|
||||
instructions = parse_instructions(sys.argv[1])
|
||||
|
||||
# Packs sources into an argument. Offset argument to work around a quirk of our
|
||||
# compiler IR when dealing with staging registers (TODO: reorder in the IR to
|
||||
# fix this)
|
||||
def pack_sources(sources, body, pack_exprs, offset):
|
||||
for i, src in enumerate(sources):
|
||||
body.append('unsigned src{} = bi_get_src(ins, regs, {});'.format(i, i + offset))
|
||||
|
||||
# Validate the source
|
||||
if src[1] != 0xFF:
|
||||
body.append('assert((1 << src{}) & {});'.format(i, hex(src[1])))
|
||||
|
||||
# Sources are state-invariant
|
||||
for state in pack_exprs:
|
||||
state.append('(src{} << {})'.format(i, src[0]))
|
||||
|
||||
body.append('')
|
||||
|
||||
# Gets the argument that the source modifier applies to from the name if
|
||||
# applicable, otherwise defaults to the first argument
|
||||
|
||||
def mod_arg(mod):
|
||||
return int(mod[-1]) if mod[-1] in "0123" else 0
|
||||
|
||||
# Widen/lane/swz/swap/replicate modifiers conceptually act as a combined extend
|
||||
# + swizzle. We look at the size of the argument to determine if we apply
|
||||
# them, and look at the swizzle to pick which one.
|
||||
|
||||
def pack_widen(mod, opts, body, pack_exprs):
|
||||
marg = mod_arg(mod)
|
||||
|
||||
body.append('unsigned {}_sz = nir_alu_type_get_type_size(ins->src_types[{}]);'.format(mod, mod_arg(mod)))
|
||||
body.append('unsigned {}_temp = 0;'.format(mod))
|
||||
|
||||
first = True
|
||||
for i, op in enumerate(opts):
|
||||
if op is None or op == 'reserved':
|
||||
continue
|
||||
|
||||
t_else = 'else ' if not first else ''
|
||||
first = False
|
||||
|
||||
if op in ['none', 'w0']:
|
||||
body.append('{}if ({}_sz == 32) {}_temp = {};'.format(t_else, mod, mod, i))
|
||||
elif op == 'd0':
|
||||
body.append('{}if ({}_sz == 64) {}_temp = {};'.format(t_else, mod, mod, i))
|
||||
else:
|
||||
assert(op[0] in ['h', 'b'])
|
||||
sz = 16 if op[0] == 'h' else 8
|
||||
|
||||
# Condition on the swizzle
|
||||
conds = ['(ins->swizzle[{}][{}] % 4) == {}'.format(marg, idx, lane) for idx, lane in enumerate(op[1:])]
|
||||
cond = " && ".join(conds)
|
||||
|
||||
body.append('{}if ({}_sz == {} && {}) {}_temp = {};'.format(t_else, mod, sz, cond, mod, i))
|
||||
body.append('else unreachable("Could not pattern match widen");')
|
||||
|
||||
return mod + '_temp'
|
||||
|
||||
# abs/neg are stored in ins->src_{abs,neg}[src] arrays
|
||||
def pack_absneg(mod, opts, body, pack_exprs):
|
||||
return 'ins->src_{}[{}]'.format(mod[0:-1] if mod[-1] in "0123" else mod, mod_arg(mod))
|
||||
|
||||
# ins->round is the native format (RTE/RTP/RTN/RTZ) for most ops. But there
|
||||
# are some others we might encounter that we don't support in the IR at this
|
||||
# point, and there are a few that force a subset of round modes.
|
||||
|
||||
def pack_round(mod, opts, body, pack_exprs):
|
||||
if opts == ['none', 'rtz']:
|
||||
body.append('assert(ins->round == BI_ROUND_NONE || ins->round == BI_ROUND_RTZ);')
|
||||
return '(ins->round == BI_ROUND_RTZ) ? 1 : 0'
|
||||
elif opts == ['rtn', 'rtp']:
|
||||
body.append('assert(ins->round == BI_ROUND_RTN || ins->round == BI_ROUND_RTP);')
|
||||
return '(ins->round == BI_ROUND_RTP) ? 1 : 0'
|
||||
elif opts[0:4] == ['none', 'rtp', 'rtn', 'rtz']:
|
||||
return 'ins->round'
|
||||
else:
|
||||
assert False
|
||||
|
||||
# Likewise, matches our native format
|
||||
|
||||
def pack_clamp(mod, opts, body, pack_exprs):
|
||||
if opts == ['none', 'clamp_0_inf', 'clamp_m1_1', 'clamp_0_1']:
|
||||
return 'ins->clamp'
|
||||
elif opts == ['none', 'clamp_0_1']:
|
||||
body.append('assert(ins->clamp == BI_CLAMP_NONE || ins->clamp == BI_CLAMP_CLAMP_0_1);')
|
||||
return '(ins->clamp == BI_CLAMP_CLAMP_0_1) ? 1 : 0'
|
||||
else:
|
||||
assert False
|
||||
|
||||
# Our modifiers match up in name, but there is no shortage of orders. So just
|
||||
# emit a table on the fly for it, since you won't get something much better.
|
||||
# ENUM_BI_COND must be kept synced with `enum bi_cond` in compiler.h
|
||||
|
||||
ENUM_BI_COND = [
|
||||
"al",
|
||||
"lt",
|
||||
"le",
|
||||
"ge",
|
||||
"gt",
|
||||
"eq",
|
||||
"ne",
|
||||
]
|
||||
|
||||
def pack_cmpf(mod, opts, body, pack_exprs):
|
||||
# Generate a table mapping ENUM_BI_COND to opts, or an invalid
|
||||
# sentintel if not used (which will then be asserted out in a debug build).
|
||||
table = [str(opts.index(x)) if x in opts else '~0' for x in ENUM_BI_COND]
|
||||
|
||||
body.append('unsigned cmpf_table[] = {')
|
||||
body.append(' ' + ', '.join(table))
|
||||
body.append('};')
|
||||
|
||||
return 'cmpf_table[ins->cond]'
|
||||
|
||||
# Since our IR is explicitly typed, we look at the size/sign to determine sign
|
||||
# extension behaviour
|
||||
def pack_extend(mod, opts, body, pack_exprs):
|
||||
body.append('ASSERTED bool {}_small = nir_alu_type_get_type_size(ins->src_types[{}]) <= 16;'.format(mod, mod_arg(mod)))
|
||||
body.append('bool {}_signed = nir_alu_type_get_base_type(ins->src_types[{}]) == nir_type_int;'.format(mod, mod_arg(mod)))
|
||||
|
||||
if opts == ['none', 'sext', 'zext', 'reserved']:
|
||||
return '{}_small ? ({}_signed ? 1 : 2) : 0'.format(mod, mod)
|
||||
else:
|
||||
assert opts == ['zext', 'sext']
|
||||
body.append('assert({}_small);'.format(mod))
|
||||
return '{}_signed ? 1 : 0'.format(mod)
|
||||
|
||||
def pack_not_src1(mod, opts, body, pack_exprs):
|
||||
return 'ins->bitwise.src1_invert ? {} : {}'.format(opts.index('not'), opts.index('none'))
|
||||
|
||||
def pack_not_result(mod, opts, body, pack_exprs):
|
||||
return 'ins->bitwise.dest_invert ? {} : {}'.format(opts.index('not'), opts.index('none'))
|
||||
|
||||
REGISTER_FORMATS = {
|
||||
'f64': 'nir_type_float64',
|
||||
'f32': 'nir_type_float32',
|
||||
'f16': 'nir_type_float16',
|
||||
'u64': 'nir_type_uint64',
|
||||
'u32': 'nir_type_uint32',
|
||||
'u16': 'nir_type_uint16',
|
||||
'i64': 'nir_type_int64',
|
||||
's32': 'nir_type_int32',
|
||||
's16': 'nir_type_int16'
|
||||
}
|
||||
|
||||
def pack_register_format(mod, opts, body, pack_exprs):
|
||||
body.append('unsigned {}_temp = 0;'.format(mod))
|
||||
|
||||
first = True
|
||||
auto = None
|
||||
for i, op in enumerate(opts):
|
||||
if op is None or op == 'reserved':
|
||||
continue
|
||||
|
||||
if op == 'auto':
|
||||
assert(auto == None)
|
||||
auto = i
|
||||
continue
|
||||
|
||||
t_else = 'else ' if not first else ''
|
||||
first = False
|
||||
nir_type = REGISTER_FORMATS.get(op)
|
||||
|
||||
if nir_type:
|
||||
body.append('{}if (ins->format == {}) {}_temp = {};'.format(t_else, nir_type, mod, i))
|
||||
|
||||
assert not first
|
||||
if auto is None:
|
||||
body.append('else unreachable("Could not pattern match register format");')
|
||||
else:
|
||||
body.append('else {}_temp = {};'.format(mod, auto))
|
||||
return mod + '_temp'
|
||||
|
||||
def pack_seg(mod, opts, body, pack_exprs):
|
||||
if len(opts) == 8:
|
||||
body.append('assert(ins->segment);')
|
||||
return 'ins->segment'
|
||||
elif opts == ['none', 'wls']:
|
||||
body.append('assert(ins->segment == BI_SEG_NONE || ins->segment == BI_SEG_WLS);')
|
||||
return 'ins->segment == BI_SEG_WLS ? 1 : 0'
|
||||
else:
|
||||
assert(False)
|
||||
|
||||
# Processes modifiers. If used directly, emits a pack. Otherwise, just
|
||||
# processes the value (grabbing it from the IR). This must sync with the IR.
|
||||
|
||||
modifier_map = {
|
||||
"widen": pack_widen,
|
||||
"widen0": pack_widen,
|
||||
"widen1": pack_widen,
|
||||
"lane": pack_widen,
|
||||
"lane0": pack_widen,
|
||||
"lane1": pack_widen,
|
||||
"lane2": pack_widen,
|
||||
"lane3": pack_widen,
|
||||
"lanes0": pack_widen,
|
||||
"lanes1": pack_widen,
|
||||
"lanes2": pack_widen,
|
||||
"swz": pack_widen,
|
||||
"swz0": pack_widen,
|
||||
"swz1": pack_widen,
|
||||
"swz2": pack_widen,
|
||||
"swap0": pack_widen,
|
||||
"swap1": pack_widen,
|
||||
"swap2": pack_widen,
|
||||
"replicate0": pack_widen,
|
||||
"replicate1": pack_widen,
|
||||
|
||||
"abs": pack_absneg,
|
||||
"abs0": pack_absneg,
|
||||
"abs1": pack_absneg,
|
||||
"abs2": pack_absneg,
|
||||
"neg": pack_absneg,
|
||||
"neg0": pack_absneg,
|
||||
"neg1": pack_absneg,
|
||||
"neg2": pack_absneg,
|
||||
|
||||
"extend": pack_extend,
|
||||
"extend0": pack_extend,
|
||||
"extend1": pack_extend,
|
||||
"extend2": pack_extend,
|
||||
"sign0": pack_extend,
|
||||
"sign1": pack_extend,
|
||||
|
||||
"clamp": pack_clamp,
|
||||
"round": pack_round,
|
||||
"cmpf": pack_cmpf,
|
||||
"varying_name": lambda a,b,c,d: 'ins->load_vary.var_id',
|
||||
"not1": pack_not_src1,
|
||||
"not_result": pack_not_result,
|
||||
"register_format": pack_register_format,
|
||||
"seg": pack_seg,
|
||||
"update": lambda a,b,c,d: 'ins->load_vary.update_mode',
|
||||
|
||||
# Just a minus one modifier
|
||||
"vecsize": lambda a,b,c,d: 'ins->vector_channels - 1',
|
||||
|
||||
# 0: compute 1: zero
|
||||
"lod_mode": lambda a,b,c,d: '1 - ins->texture.compute_lod',
|
||||
"skip": lambda a,b,c,d: 'ins->skip',
|
||||
|
||||
# Not much choice in the matter...
|
||||
"divzero": lambda a,b,c,d: '0',
|
||||
"sem": lambda a,b,c,d: '0', # IEEE 754 compliant NaN rules
|
||||
|
||||
# For +ZS_EMIT, infer modifiers from specified sources
|
||||
"z": lambda a,b,c,d: '(ins->src[0] != 0)',
|
||||
"stencil": lambda a,b,c,d: '(ins->src[1] != 0)',
|
||||
|
||||
# For +LD_VAR, infer sample from load_vary.interp_mode
|
||||
"sample": lambda a,b,c,d: 'ins->load_vary.interp_mode',
|
||||
|
||||
# +CLPER
|
||||
"lane_op": lambda a,b,c,d: 'ins->special.clper.lane_op_mod',
|
||||
"inactive_result": lambda a,b,c,d: 'ins->special.clper.inactive_res',
|
||||
|
||||
# +CLPER and +WMASK
|
||||
"subgroup": lambda a,b,c,d: 'ins->special.subgroup_sz',
|
||||
|
||||
# We don't support these in the IR yet (TODO)
|
||||
"saturate": lambda a,b,c,d: '0', # clamp to min/max int
|
||||
"mask": lambda a,b,c,d: '0', # clz(~0) = ~0
|
||||
"result_type": lambda a,opts,c,d: str(opts.index('m1')), # #1, #1.0, ~0 for cmp
|
||||
"special": lambda a,b,c,d: '0', # none, which source wins..
|
||||
"offset": lambda a,b,c,d: '0', # sin/cos thing
|
||||
"adj": lambda a,b,c,d: '0', # sin/cos thing
|
||||
"sqrt": lambda a,b,c,d: '0', # sin/cos thing
|
||||
"log": lambda a,b,c,d: '1', # frexpe mode -- TODO: other transcendentals for g71
|
||||
"scale": lambda a,b,c,d: '0', # sin/cos thing
|
||||
"precision": lambda a,b,c,d: '0', # log thing
|
||||
"mode": lambda a,b,c,d: '0', # log thing
|
||||
"func": lambda a,b,c,d: '0', # pow special case thing
|
||||
"h": lambda a,b,c,d: '0', # VN_ASST1.f16
|
||||
"l": lambda a,b,c,d: '0', # VN_ASST1.f16
|
||||
"function": lambda a,b,c,d: '3', # LD_VAR_FLAT none
|
||||
"preserve_null": lambda a,b,c,d: '0', # SEG_ADD none
|
||||
"bytes2": lambda a,b,c,d: '0', # NIR shifts are in bits
|
||||
"result_word": lambda a,b,c,d: '0', # 32-bit only shifts for now (TODO)
|
||||
"source": lambda a,b,c,d: '7', # cycle_counter for LD_GCLK
|
||||
"threads": lambda a,b,c,d: '0', # IMULD odd
|
||||
"combine": lambda a,b,c,d: '0', # BRANCHC any
|
||||
"format": lambda a,b,c,d: '1', # LEA_TEX_IMM u32
|
||||
"test_mode": lambda a,b,c,d: '0', # JUMP_EX z
|
||||
"stack_mode": lambda a,b,c,d: '2', # JUMP_EX none
|
||||
"atom_opc": lambda a,b,c,d: '2', # ATOM_C aadd
|
||||
"mux": lambda a,b,c,d: '1', # MUX int_zero
|
||||
}
|
||||
|
||||
def pack_modifier(mod, width, default, opts, body, pack_exprs):
|
||||
# Invoke the specific one
|
||||
fn = modifier_map.get(mod)
|
||||
|
||||
if fn is None:
|
||||
return None
|
||||
|
||||
expr = fn(mod, opts, body, pack_exprs)
|
||||
body.append('unsigned {} = {};'.format(mod, expr))
|
||||
|
||||
# Validate we don't overflow
|
||||
try:
|
||||
assert(int(expr) < (1 << width))
|
||||
except:
|
||||
body.append('assert({} < {});'.format(mod, (1 << width)))
|
||||
|
||||
body.append('')
|
||||
|
||||
return True
|
||||
|
||||
# Compiles an S-expression (and/or/eq/neq, modifiers, `ordering`, immediates)
|
||||
# into a C boolean expression suitable to stick in an if-statement. Takes an
|
||||
# imm_map to map modifiers to immediate values, parametrized by the ctx that
|
||||
# we're looking up in (the first, non-immediate argument of the equality)
|
||||
|
||||
SEXPR_BINARY = {
|
||||
"and": "&&",
|
||||
"or": "||",
|
||||
"eq": "==",
|
||||
"neq": "!="
|
||||
}
|
||||
|
||||
def compile_s_expr(expr, imm_map, ctx):
|
||||
if expr[0] == 'alias':
|
||||
return compile_s_expr(expr[1], imm_map, ctx)
|
||||
elif expr == ['eq', 'ordering', '#gt']:
|
||||
return '(src0 > src1)'
|
||||
elif expr == ['neq', 'ordering', '#lt']:
|
||||
return '(src0 >= src1)'
|
||||
elif expr == ['neq', 'ordering', '#gt']:
|
||||
return '(src0 <= src1)'
|
||||
elif expr == ['eq', 'ordering', '#lt']:
|
||||
return '(src0 < src1)'
|
||||
elif expr == ['eq', 'ordering', '#eq']:
|
||||
return '(src0 == src1)'
|
||||
elif isinstance(expr, list):
|
||||
sep = " {} ".format(SEXPR_BINARY[expr[0]])
|
||||
return "(" + sep.join([compile_s_expr(s, imm_map, expr[1]) for s in expr[1:]]) + ")"
|
||||
elif expr[0] == '#':
|
||||
return str(imm_map[ctx][expr[1:]])
|
||||
else:
|
||||
return expr
|
||||
|
||||
# Packs a derived value. We just iterate through the possible choices and test
|
||||
# whether the encoding matches, and if so we use it.
|
||||
|
||||
def pack_derived(pos, exprs, imm_map, body, pack_exprs):
|
||||
body.append('unsigned derived_{} = 0;'.format(pos))
|
||||
|
||||
first = True
|
||||
for i, expr in enumerate(exprs):
|
||||
if expr is not None:
|
||||
cond = compile_s_expr(expr, imm_map, None)
|
||||
body.append('{}if {} derived_{} = {};'.format('' if first else 'else ', cond, pos, i))
|
||||
first = False
|
||||
|
||||
assert (not first)
|
||||
body.append('else unreachable("No pattern match at pos {}");'.format(pos))
|
||||
body.append('')
|
||||
|
||||
assert(pos is not None)
|
||||
pack_exprs.append('(derived_{} << {})'.format(pos, pos))
|
||||
|
||||
# Table mapping immediate names in the machine to expressions of `ins` to
|
||||
# lookup the value in the IR, performing adjustments as needed
|
||||
|
||||
IMMEDIATE_TABLE = {
|
||||
'attribute_index': 'ins->attribute.index',
|
||||
'varying_index': 'ins->texture.varying_index',
|
||||
'index': 'ins->load_vary.index',
|
||||
'texture_index': 'ins->texture.texture_index',
|
||||
'sampler_index': 'ins->texture.sampler_index',
|
||||
'table': '63', # Bindless (flat addressing) mode for DTSEL_IMM
|
||||
|
||||
# Not supported in the IR (TODO)
|
||||
'shift': '0',
|
||||
'fill': '0', # WMASK
|
||||
}
|
||||
|
||||
# Generates a routine to pack a single variant of a single- instruction.
|
||||
# Template applies the needed formatting and combine to OR together all the
|
||||
# pack_exprs to avoid bit fields.
|
||||
#
|
||||
# Argument swapping is sensitive to the order of operations. Dependencies:
|
||||
# sources (RW), modifiers (RW), derived values (W). Hence we emit sources and
|
||||
# modifiers first, then perform a swap if necessary overwriting
|
||||
# sources/modifiers, and last calculate derived values and pack.
|
||||
|
||||
variant_template = Template("""static inline unsigned
|
||||
pan_pack_${name}(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
|
||||
{
|
||||
${"\\n".join([(" " + x) for x in common_body])}
|
||||
% if single_state:
|
||||
% for (pack_exprs, s_body, _) in states:
|
||||
${"\\n".join([" " + x for x in s_body + ["return {};".format( " | ".join(pack_exprs))]])}
|
||||
% endfor
|
||||
% else:
|
||||
% for i, (pack_exprs, s_body, cond) in enumerate(states):
|
||||
${'} else ' if i > 0 else ''}if ${cond} {
|
||||
${"\\n".join([" " + x for x in s_body + ["return {};".format(" | ".join(pack_exprs))]])}
|
||||
% endfor
|
||||
} else {
|
||||
unreachable("No matching state found in ${name}");
|
||||
}
|
||||
% endif
|
||||
}
|
||||
""")
|
||||
|
||||
def pack_variant(opname, states):
|
||||
# Expressions to be ORed together for the final pack, an array per state
|
||||
pack_exprs = [[hex(state[1]["exact"][1])] for state in states]
|
||||
|
||||
# Computations which need to be done to encode first, across states
|
||||
common_body = []
|
||||
|
||||
# Map from modifier names to a map from modifier values to encoded values
|
||||
# String -> { String -> Uint }. This can be shared across states since
|
||||
# modifiers are (except the pos values) constant across state.
|
||||
imm_map = {}
|
||||
|
||||
# Pack sources. Offset over to deal with staging/immediate weirdness in our
|
||||
# IR (TODO: reorder sources upstream so this goes away). Note sources are
|
||||
# constant across states.
|
||||
staging = states[0][1].get("staging", "")
|
||||
offset = 0
|
||||
if staging in ["r", "rw"]:
|
||||
offset += 1
|
||||
|
||||
pack_sources(states[0][1].get("srcs", []), common_body, pack_exprs, offset)
|
||||
|
||||
modifiers_handled = []
|
||||
for st in states:
|
||||
for ((mod, _, width), default, opts) in st[1].get("modifiers", []):
|
||||
if mod in modifiers_handled:
|
||||
continue
|
||||
|
||||
modifiers_handled.append(mod)
|
||||
|
||||
if pack_modifier(mod, width, default, opts, common_body, pack_exprs) is None:
|
||||
return None
|
||||
|
||||
imm_map[mod] = { x: y for y, x in enumerate(opts) }
|
||||
|
||||
for i, st in enumerate(states):
|
||||
for ((mod, pos, width), default, opts) in st[1].get("modifiers", []):
|
||||
if pos is not None:
|
||||
pack_exprs[i].append('({} << {})'.format(mod, pos))
|
||||
|
||||
for ((src_a, src_b), cond, remap) in st[1].get("swaps", []):
|
||||
# Figure out which vars to swap, in order to swap the arguments. This
|
||||
# always includes the sources themselves, and may include source
|
||||
# modifiers (with the same source indices). We swap based on which
|
||||
# matches A, this is arbitrary but if we swapped both nothing would end
|
||||
# up swapping at all since it would swap back.
|
||||
|
||||
vars_to_swap = ['src']
|
||||
for ((mod, _, width), default, opts) in st[1].get("modifiers", []):
|
||||
if mod[-1] in str(src_a):
|
||||
vars_to_swap.append(mod[0:-1])
|
||||
|
||||
common_body.append('if {}'.format(compile_s_expr(cond, imm_map, None)) + ' {')
|
||||
|
||||
# Emit the swaps. We use a temp, and wrap in a block to avoid naming
|
||||
# collisions with multiple swaps. {{Doubling}} to escape the format.
|
||||
|
||||
for v in vars_to_swap:
|
||||
common_body.append(' {{ unsigned temp = {}{}; {}{} = {}{}; {}{} = temp; }}'.format(v, src_a, v, src_a, v, src_b, v, src_b))
|
||||
|
||||
# Also, remap. Bidrectional swaps are explicit in the XML.
|
||||
for v in remap:
|
||||
maps = remap[v]
|
||||
imm = imm_map[v]
|
||||
|
||||
for i, l in enumerate(maps):
|
||||
common_body.append(' {}if ({} == {}) {} = {};'.format('' if i == 0 else 'else ', v, imm[l], v, imm[maps[l]]))
|
||||
|
||||
common_body.append('}')
|
||||
common_body.append('')
|
||||
|
||||
for (name, pos, width) in st[1].get("immediates", []):
|
||||
if name not in IMMEDIATE_TABLE:
|
||||
return None
|
||||
|
||||
common_body.append('unsigned {} = {};'.format(name, IMMEDIATE_TABLE[name]))
|
||||
|
||||
for st in pack_exprs:
|
||||
st.append('({} << {})'.format(name, pos))
|
||||
|
||||
if staging == 'r':
|
||||
common_body.append('bi_read_staging_register(clause, ins);')
|
||||
elif staging == 'w':
|
||||
common_body.append('bi_write_staging_register(clause, ins);')
|
||||
elif staging == '':
|
||||
pass
|
||||
else:
|
||||
assert staging == 'rw'
|
||||
# XXX: register allocation requirement (!)
|
||||
common_body.append('bi_read_staging_register(clause, ins);')
|
||||
common_body.append('assert(ins->src[0] == ins->dest);')
|
||||
|
||||
# After this, we have to branch off, since deriveds *do* vary based on state.
|
||||
state_body = [[] for s in states]
|
||||
|
||||
for i, (_, st) in enumerate(states):
|
||||
for ((pos, width), exprs) in st.get("derived", []):
|
||||
pack_derived(pos, exprs, imm_map, state_body[i], pack_exprs[i])
|
||||
|
||||
# How do we pick a state? Accumulate the conditions
|
||||
state_conds = [compile_s_expr(st[0], imm_map, None) for st in states] if len(states) > 1 else [None]
|
||||
|
||||
if state_conds == None:
|
||||
assert (states[0][0] == None)
|
||||
|
||||
# Finally, we'll collect everything together
|
||||
return variant_template.render(name = opname_to_c(opname), states = zip(pack_exprs, state_body, state_conds), common_body = common_body, single_state = (len(states) == 1))
|
||||
|
||||
HEADER = COPYRIGHT + """#ifndef _BI_GENERATED_PACK_H
|
||||
#define _BI_GENERATED_PACK_H
|
||||
|
||||
#include "compiler.h"
|
||||
#include "bi_pack_helpers.h"
|
||||
"""
|
||||
|
||||
print(HEADER)
|
||||
|
||||
packs = [pack_variant(e, instructions[e]) for e in instructions]
|
||||
for p in packs:
|
||||
print(p)
|
||||
|
||||
print("#endif")
|
|
@ -81,20 +81,6 @@ bi_packer_c = custom_target(
|
|||
depend_files : files('bifrost_isa.py'),
|
||||
)
|
||||
|
||||
bi_generated_pack_h = custom_target(
|
||||
'bi_generated_pack.h',
|
||||
input : ['gen_pack.py', 'ISA.xml'],
|
||||
output : 'bi_generated_pack.h',
|
||||
command : [prog_python, '@INPUT@'],
|
||||
capture : true,
|
||||
depend_files : files('bifrost_isa.py'),
|
||||
)
|
||||
|
||||
idep_bi_generated_pack_h = declare_dependency(
|
||||
sources : [bi_generated_pack_h],
|
||||
include_directories : include_directories('.'),
|
||||
)
|
||||
|
||||
bi_opcodes_h = custom_target(
|
||||
'bi_opcodes.h',
|
||||
input : ['bi_opcodes.h.py', 'ISA.xml'],
|
||||
|
@ -138,7 +124,7 @@ libpanfrost_bifrost = static_library(
|
|||
'panfrost_bifrost',
|
||||
[libpanfrost_bifrost_files, bifrost_nir_algebraic_c, bi_opcodes_c, bi_printer_c, bi_packer_c],
|
||||
include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux, inc_panfrost_hw],
|
||||
dependencies: [idep_nir, idep_bi_generated_pack_h, idep_bi_opcodes_h, idep_bi_builder_h],
|
||||
dependencies: [idep_nir, idep_bi_opcodes_h, idep_bi_builder_h],
|
||||
link_with: [libpanfrost_util, libpanfrost_bifrost_disasm],
|
||||
c_args : [no_override_init_args],
|
||||
gnu_symbol_visibility : 'hidden',
|
||||
|
|
Loading…
Reference in New Issue