From a1e150fc4d4e8fd1d3dce897a92e34521b64c294 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 16 Dec 2020 14:07:32 -0500 Subject: [PATCH] pan/bi: Remove old IR packs Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/bi_pack.c | 544 ------------------------------ src/panfrost/bifrost/gen_pack.py | 556 ------------------------------- src/panfrost/bifrost/meson.build | 16 +- 3 files changed, 1 insertion(+), 1115 deletions(-) delete mode 100644 src/panfrost/bifrost/gen_pack.py diff --git a/src/panfrost/bifrost/bi_pack.c b/src/panfrost/bifrost/bi_pack.c index 3b5d127569d..76afe3efcd2 100644 --- a/src/panfrost/bifrost/bi_pack.c +++ b/src/panfrost/bifrost/bi_pack.c @@ -23,13 +23,6 @@ #include "compiler.h" #include "bi_print.h" -#include "bi_generated_pack.h" - -#define RETURN_PACKED(str) { \ - uint64_t temp = 0; \ - memcpy(&temp, &str, sizeof(str)); \ - return temp; \ -} /* This file contains the final passes of the compiler. Running after * scheduling and RA, the IR is now finalized, so we need to emit it to actual @@ -388,543 +381,6 @@ bi_pack_registers(bi_registers regs) return packed; } -static unsigned -bi_pack_fma_special(bi_clause *clause, bi_instruction *ins, bi_registers *regs) -{ - switch (ins->op.special) { - case BI_SPECIAL_CUBEFACE1: - return pan_pack_fma_cubeface1(clause, ins, regs); - default: - unreachable("Unknown special op"); - } -} - -#define BI_PACK_SHIFT(name) \ -static unsigned \ -bi_pack_fma_ ## name(bi_clause *clause, bi_instruction *ins, bi_registers *regs) \ -{ \ - switch (nir_alu_type_get_type_size(ins->dest_type)) { \ - case 32: \ - return pan_pack_fma_ ## name ## _i32(clause, ins, regs); \ - case 16: \ - return pan_pack_fma_ ## name ## _v2i16(clause, ins, regs); \ - case 8: \ - return pan_pack_fma_ ## name ## _v4i8(clause, ins, regs); \ - default: \ - unreachable("Invalid dest size"); \ - } \ -} - -BI_PACK_SHIFT(rshift_and) -BI_PACK_SHIFT(lshift_and) -BI_PACK_SHIFT(rshift_or) -BI_PACK_SHIFT(lshift_or) -BI_PACK_SHIFT(rshift_xor) -BI_PACK_SHIFT(lshift_xor) -BI_PACK_SHIFT(arshift) - -static unsigned -bi_pack_fma_bitwise(bi_clause *clause, bi_instruction *ins, bi_registers *regs) -{ - switch (ins->op.bitwise) { - case BI_BITWISE_AND: - return ins->bitwise.rshift ? - bi_pack_fma_rshift_and(clause, ins, regs) : - bi_pack_fma_lshift_and(clause, ins, regs); - case BI_BITWISE_OR: - return ins->bitwise.rshift ? - bi_pack_fma_rshift_or(clause, ins, regs) : - bi_pack_fma_lshift_or(clause, ins, regs); - case BI_BITWISE_XOR: - return ins->bitwise.rshift ? - bi_pack_fma_rshift_xor(clause, ins, regs) : - bi_pack_fma_lshift_xor(clause, ins, regs); - case BI_BITWISE_ARSHIFT: - assert(ins->bitwise.rshift); - return bi_pack_fma_arshift(clause, ins, regs); - default: - unreachable("Invalid bitwise op"); - } -} - -static unsigned -pan_pack_fma(bi_clause *clause, bi_bundle bundle, bi_registers *regs) -{ - if (!bundle.fma) - return pan_pack_fma_nop_i32(clause, NULL, regs); - - bool f16 = bundle.fma->dest_type == nir_type_float16; - bool f32 = bundle.fma->dest_type == nir_type_float32; - bool u32 = bundle.fma->dest_type == nir_type_uint32 || - bundle.fma->dest_type == nir_type_bool32; - bool u16 = bundle.fma->dest_type == nir_type_uint16; - bool s32 = bundle.fma->dest_type == nir_type_int32; - bool s16 = bundle.fma->dest_type == nir_type_int16; - - bool src0_f16 = bundle.fma->src_types[0] == nir_type_float16; - bool src0_f32 = bundle.fma->src_types[0] == nir_type_float32; - bool src0_u16 = bundle.fma->src_types[0] == nir_type_uint16; - bool src0_s16 = bundle.fma->src_types[0] == nir_type_int16; - bool src0_s8 = bundle.fma->src_types[0] == nir_type_int8; - bool src0_u8 = bundle.fma->src_types[0] == nir_type_uint8; - - enum bi_cond cond = bundle.fma->cond; - bool typeless_cond = (cond == BI_COND_EQ) || (cond == BI_COND_NE); - - switch (bundle.fma->type) { - case BI_ADD: - if (bundle.fma->dest_type == nir_type_float32) - return pan_pack_fma_fadd_f32(clause, bundle.fma, regs); - else if (bundle.fma->dest_type == nir_type_float16) - return pan_pack_fma_fadd_v2f16(clause, bundle.fma, regs); - - unreachable("TODO"); - case BI_CMP: - assert (src0_f16 || src0_f32); - - if (src0_f32) - return pan_pack_fma_fcmp_f32(clause, bundle.fma, regs); - else - return pan_pack_fma_fcmp_v2f16(clause, bundle.fma, regs); - case BI_BITWISE: - return bi_pack_fma_bitwise(clause, bundle.fma, regs); - case BI_CONVERT: - if (src0_s8) { - assert(s32); - return pan_pack_fma_s8_to_s32(clause, bundle.fma, regs); - } else if (src0_u8) { - assert(u32); - return pan_pack_fma_u8_to_u32(clause, bundle.fma, regs); - } else if (src0_s16) { - assert(s32); - return pan_pack_fma_s16_to_s32(clause, bundle.fma, regs); - } else if (src0_u16) { - assert(u32); - return pan_pack_fma_u16_to_u32(clause, bundle.fma, regs); - } else if (src0_f16) { - assert(f32); - return pan_pack_fma_f16_to_f32(clause, bundle.fma, regs); - } else if (src0_f32) { - assert(f16); - return pan_pack_fma_v2f32_to_v2f16(clause, bundle.fma, regs); - } - - unreachable("Invalid FMA convert"); - case BI_CSEL: - if (f32) - return pan_pack_fma_csel_f32(clause, bundle.fma, regs); - else if (f16) - return pan_pack_fma_csel_v2f16(clause, bundle.fma, regs); - else if ((u32 || s32) && typeless_cond) - return pan_pack_fma_csel_i32(clause, bundle.fma, regs); - else if ((u16 || s16) && typeless_cond) - return pan_pack_fma_csel_v2i16(clause, bundle.fma, regs); - else if (u32) - return pan_pack_fma_csel_u32(clause, bundle.fma, regs); - else if (u16) - return pan_pack_fma_csel_v2u16(clause, bundle.fma, regs); - else if (s32) - return pan_pack_fma_csel_s32(clause, bundle.fma, regs); - else if (s16) - return pan_pack_fma_csel_v2s16(clause, bundle.fma, regs); - else - unreachable("Invalid csel type"); - case BI_FMA: - if (bundle.fma->dest_type == nir_type_float32) { - if (bundle.fma->op.mscale) - return pan_pack_fma_fma_rscale_f32(clause, bundle.fma, regs); - else - return pan_pack_fma_fma_f32(clause, bundle.fma, regs); - } else { - assert(bundle.fma->dest_type == nir_type_float16); - - if (bundle.fma->op.mscale) - return pan_pack_fma_fma_rscale_v2f16(clause, bundle.fma, regs); - else - return pan_pack_fma_fma_v2f16(clause, bundle.fma, regs); - } - case BI_FREXP: - assert(src0_f32 || src0_f16); - - if (src0_f32) - return pan_pack_fma_frexpe_f32(clause, bundle.fma, regs); - else - return pan_pack_fma_frexpe_v2f16(clause, bundle.fma, regs); - case BI_IMATH: - /* XXX: Only 32-bit, with carries/borrows forced */ - assert(s32 || u32); - - if (bundle.fma->op.imath == BI_IMATH_ADD) - return pan_pack_fma_iaddc_i32(clause, bundle.fma, regs); - else - return pan_pack_fma_isubb_i32(clause, bundle.fma, regs); - case BI_MOV: - return pan_pack_fma_mov_i32(clause, bundle.fma, regs); - case BI_SELECT: - if (nir_alu_type_get_type_size(bundle.fma->src_types[0]) == 16) { - return pan_pack_fma_mkvec_v2i16(clause, bundle.fma, regs); - } else { - assert(nir_alu_type_get_type_size(bundle.fma->src_types[0]) == 8); - return pan_pack_fma_mkvec_v4i8(clause, bundle.fma, regs); - } - case BI_ROUND: - assert(f16 || f32); - - if (f16) - return pan_pack_fma_fround_v2f16(clause, bundle.fma, regs); - else - return pan_pack_fma_fround_f32(clause, bundle.fma, regs); - case BI_REDUCE_FMA: - assert(src0_f32 && f32); - return pan_pack_fma_fadd_lscale_f32(clause, bundle.fma, regs); - case BI_IMUL: - return pan_pack_fma_imul_i32(clause, bundle.fma, regs); - case BI_SPECIAL_FMA: - return bi_pack_fma_special(clause, bundle.fma, regs); - default: - unreachable("Cannot encode class as FMA"); - } -} - -static unsigned -bi_pack_add_branch_cond(bi_instruction *ins, bi_registers *regs) -{ - assert(ins->cond == BI_COND_EQ); - assert(ins->src[1] == BIR_INDEX_ZERO); - - unsigned zero_ctrl = 0; - unsigned size = nir_alu_type_get_type_size(ins->src_types[0]); - - if (size == 16) { - /* See BR_SIZE_ZERO swizzle disassembly */ - zero_ctrl = ins->swizzle[0][0] ? 1 : 2; - } else { - assert(size == 32); - } - - /* EQ swap to NE */ - bool slot_swapped = false; - - struct bifrost_branch pack = { - .src0 = bi_get_src(ins, regs, 0), - .src1 = (zero_ctrl << 1) | !slot_swapped, - .cond = BR_COND_EQ, - .size = BR_SIZE_ZERO, - .op = BIFROST_ADD_OP_BRANCH - }; - - if (ins->branch_target) { - /* We assigned the constant slot to fetch the branch offset so - * we can just passthrough here. We put in the HI slot to match - * the blob since that's where the magic flags end up - */ - assert(!ins->src[2]); - pack.src2 = BIFROST_SRC_FAU_HI; - } else { - pack.src2 = bi_get_src(ins, regs, 2); - } - - RETURN_PACKED(pack); -} - -static unsigned -bi_pack_add_branch_uncond(bi_instruction *ins, bi_registers *regs) -{ - struct bifrost_branch pack = { - /* It's unclear what these bits actually mean */ - .src0 = BIFROST_SRC_FAU_LO, - .src1 = BIFROST_SRC_PASS_FMA, - - /* All ones in fact */ - .cond = (BR_ALWAYS & 0x7), - .size = (BR_ALWAYS >> 3), - .op = BIFROST_ADD_OP_BRANCH - }; - - if (ins->branch_target) { - /* Offset is passed as a PC-relative offset through an - * embedded constant. - */ - assert(!ins->src[2]); - pack.src2 = BIFROST_SRC_FAU_HI; - } else { - pack.src2 = bi_get_src(ins, regs, 2); - } - - RETURN_PACKED(pack); -} - -static unsigned -bi_pack_add_branch(bi_instruction *ins, bi_registers *regs) -{ - if (ins->cond == BI_COND_ALWAYS) - return bi_pack_add_branch_uncond(ins, regs); - else - return bi_pack_add_branch_cond(ins, regs); -} - -static unsigned -bi_pack_add_special(bi_clause *clause, bi_instruction *ins, bi_registers *regs) -{ - bool f16 = ins->dest_type == nir_type_float16; - - switch (ins->op.special) { - case BI_SPECIAL_FRCP: - return f16 ? pan_pack_add_frcp_f16(clause, ins, regs) : - pan_pack_add_frcp_f32(clause, ins, regs); - case BI_SPECIAL_FRSQ: - return f16 ? pan_pack_add_frsq_f16(clause, ins, regs) : - pan_pack_add_frsq_f32(clause, ins, regs); - case BI_SPECIAL_EXP2_LOW: - assert(!f16); - return pan_pack_add_fexp_f32(clause, ins, regs); - case BI_SPECIAL_IABS: - assert(ins->src_types[0] == nir_type_int32); - return pan_pack_add_iabs_s32(clause, ins, regs); - case BI_SPECIAL_CUBEFACE2: - return pan_pack_add_cubeface2(clause, ins, regs); - case BI_SPECIAL_CUBE_SSEL: - return pan_pack_add_cube_ssel(clause, ins, regs); - case BI_SPECIAL_CUBE_TSEL: - return pan_pack_add_cube_tsel(clause, ins, regs); - case BI_SPECIAL_CLPER_V6: - return pan_pack_add_clper_v6_i32(clause, ins, regs); - case BI_SPECIAL_CLPER_V7: - return pan_pack_add_clper_v7_i32(clause, ins, regs); - default: - unreachable("Unknown special op"); - } -} - -static unsigned -bi_pack_add_ld_var(bi_clause *clause, bi_instruction *ins, bi_registers *regs) -{ - if (ins->load_vary.special) - return pan_pack_add_ld_var_special(clause, ins, regs); - - if (ins->load_vary.flat) { - return ins->load_vary.immediate ? - pan_pack_add_ld_var_flat_imm(clause, ins, regs) : - pan_pack_add_ld_var_flat(clause, ins, regs); - } - - return ins->load_vary.immediate ? - pan_pack_add_ld_var_imm(clause, ins, regs) : - pan_pack_add_ld_var(clause, ins, regs); -} - -static unsigned -pan_pack_add(bi_clause *clause, bi_bundle bundle, bi_registers *regs, gl_shader_stage stage) -{ - if (!bundle.add) - return pan_pack_add_nop_i32(clause, NULL, regs); - - bool f16 = bundle.add->dest_type == nir_type_float16; - bool f32 = bundle.add->dest_type == nir_type_float32; - bool u32 = bundle.add->dest_type == nir_type_uint32 || - bundle.add->dest_type == nir_type_bool32; - bool u16 = bundle.add->dest_type == nir_type_uint16; - bool s32 = bundle.add->dest_type == nir_type_int32; - bool s16 = bundle.add->dest_type == nir_type_int16; - - bool src0_f16 = bundle.add->src_types[0] == nir_type_float16; - bool src0_f32 = bundle.add->src_types[0] == nir_type_float32; - bool src0_u32 = bundle.add->src_types[0] == nir_type_uint32; - bool src0_u16 = bundle.add->src_types[0] == nir_type_uint16; - bool src0_u8 = bundle.add->src_types[0] == nir_type_uint8; - bool src0_s32 = bundle.add->src_types[0] == nir_type_int32; - bool src0_s16 = bundle.add->src_types[0] == nir_type_int16; - bool src0_s8 = bundle.add->src_types[0] == nir_type_int8; - - unsigned sz = nir_alu_type_get_type_size(bundle.add->dest_type); - enum bi_cond cond = bundle.add->cond; - bool typeless_cond = (cond == BI_COND_EQ) || (cond == BI_COND_NE); - - switch (bundle.add->type) { - case BI_ADD: - if (bundle.add->dest_type == nir_type_float32) - return pan_pack_add_fadd_f32(clause, bundle.add, regs); - else if (bundle.add->dest_type == nir_type_float16) - return pan_pack_add_fadd_v2f16(clause, bundle.add, regs); - - unreachable("TODO"); - case BI_ATEST: - return pan_pack_add_atest(clause, bundle.add, regs); - case BI_BRANCH: - return bi_pack_add_branch(bundle.add, regs); - case BI_CMP: - if (src0_f32) - return pan_pack_add_fcmp_f32(clause, bundle.add, regs); - else if (src0_f16) - return pan_pack_add_fcmp_v2f16(clause, bundle.add, regs); - else if ((src0_u32 || src0_s32) && typeless_cond) - return pan_pack_add_icmp_i32(clause, bundle.add, regs); - else if ((src0_u16 || src0_s16) && typeless_cond) - return pan_pack_add_icmp_v2i16(clause, bundle.add, regs); - else if ((src0_u8 || src0_s8) && typeless_cond) - return pan_pack_add_icmp_v4i8(clause, bundle.add, regs); - else if (src0_u32) - return pan_pack_add_icmp_u32(clause, bundle.add, regs); - else if (src0_u16) - return pan_pack_add_icmp_v2u16(clause, bundle.add, regs); - else if (src0_u8) - return pan_pack_add_icmp_v4u8(clause, bundle.add, regs); - else if (src0_s32) - return pan_pack_add_icmp_s32(clause, bundle.add, regs); - else if (src0_s16) - return pan_pack_add_icmp_v2s16(clause, bundle.add, regs); - else if (src0_s8) - return pan_pack_add_icmp_v4s8(clause, bundle.add, regs); - else - unreachable("Invalid cmp type"); - case BI_BLEND: - return pan_pack_add_blend(clause, bundle.add, regs); - case BI_BITWISE: - unreachable("Packing todo"); - case BI_CONVERT: - if (src0_f16 && s16) - return pan_pack_add_v2f16_to_v2s16(clause, bundle.add, regs); - else if (src0_f16 && u16) - return pan_pack_add_v2f16_to_v2u16(clause, bundle.add, regs); - else if (src0_f16 && s32) - return pan_pack_add_f16_to_s32(clause, bundle.add, regs); - else if (src0_f16 && u32) - return pan_pack_add_f16_to_u32(clause, bundle.add, regs); - else if (src0_s16 && f16) - return pan_pack_add_v2s16_to_v2f16(clause, bundle.add, regs); - else if (src0_u16 && f16) - return pan_pack_add_v2u16_to_v2f16(clause, bundle.add, regs); - else if (src0_s8 && s16) - return pan_pack_add_v2s8_to_v2s16(clause, bundle.add, regs); - else if (src0_u8 && u16) - return pan_pack_add_v2u8_to_v2u16(clause, bundle.add, regs); - else if (src0_s8 && f16) - return pan_pack_add_v2s8_to_v2f16(clause, bundle.add, regs); - else if (src0_u8 && f16) - return pan_pack_add_v2u8_to_v2f16(clause, bundle.add, regs); - else if (src0_f32 && s32) - return pan_pack_add_f32_to_s32(clause, bundle.add, regs); - else if (src0_f32 && u32) - return pan_pack_add_f32_to_u32(clause, bundle.add, regs); - else if (src0_s8 && s32) - return pan_pack_add_s8_to_s32(clause, bundle.add, regs); - else if (src0_u8 && u32) - return pan_pack_add_u8_to_u32(clause, bundle.add, regs); - else if (src0_s8 && f32) - return pan_pack_add_s8_to_f32(clause, bundle.add, regs); - else if (src0_u8 && f32) - return pan_pack_add_u8_to_f32(clause, bundle.add, regs); - else if (src0_s32 && f32) - return pan_pack_add_s32_to_f32(clause, bundle.add, regs); - else if (src0_u32 && f32) - return pan_pack_add_u32_to_f32(clause, bundle.add, regs); - else if (src0_s16 && s32) - return pan_pack_add_s16_to_s32(clause, bundle.add, regs); - else if (src0_u16 && u32) - return pan_pack_add_u16_to_u32(clause, bundle.add, regs); - else if (src0_s16 && f32) - return pan_pack_add_s16_to_f32(clause, bundle.add, regs); - else if (src0_u16 && f32) - return pan_pack_add_u16_to_f32(clause, bundle.add, regs); - else if (src0_f16 && f32) - return pan_pack_add_f16_to_f32(clause, bundle.add, regs); - else if (src0_f32 && f16) - return pan_pack_add_v2f32_to_v2f16(clause, bundle.add, regs); - else - unreachable("Invalid ADD convert"); - case BI_DISCARD: - return pan_pack_add_discard_f32(clause, bundle.add, regs); - case BI_FREXP: - unreachable("Packing todo"); - case BI_IMATH: - assert(sz == 8 || sz == 16 || sz == 32); - - if (bundle.add->op.imath == BI_IMATH_ADD) { - return (sz == 8) ? pan_pack_add_iadd_v4s8(clause, bundle.add, regs) : - (sz == 16) ? pan_pack_add_iadd_v2s16(clause, bundle.add, regs) : - pan_pack_add_iadd_s32(clause, bundle.add, regs); - } else { - return (sz == 8) ? pan_pack_add_isub_v4s8(clause, bundle.add, regs) : - (sz == 16) ? pan_pack_add_isub_v2s16(clause, bundle.add, regs) : - pan_pack_add_isub_s32(clause, bundle.add, regs); - } - case BI_LOAD_ATTR: - return bundle.add->attribute.immediate ? - pan_pack_add_ld_attr_imm(clause, bundle.add, regs) : - pan_pack_add_ld_attr(clause, bundle.add, regs); - case BI_LOAD: - case BI_LOAD_UNIFORM: - assert(u32 || s32 || f32); - switch (bundle.add->vector_channels) { - case 1: return pan_pack_add_load_i32(clause, bundle.add, regs); - case 2: return pan_pack_add_load_i64(clause, bundle.add, regs); - case 3: return pan_pack_add_load_i96(clause, bundle.add, regs); - case 4: return pan_pack_add_load_i128(clause, bundle.add, regs); - default: unreachable("Invalid channel count"); - } - case BI_LOAD_VAR: - return bi_pack_add_ld_var(clause, bundle.add, regs); - case BI_LOAD_VAR_ADDRESS: - return bundle.add->attribute.immediate ? - pan_pack_add_lea_attr_imm(clause, bundle.add, regs) : - pan_pack_add_lea_attr(clause, bundle.add, regs); - case BI_LOAD_TILE: - return pan_pack_add_ld_tile(clause, bundle.add, regs); - case BI_MINMAX: - if (bundle.add->op.minmax == BI_MINMAX_MIN) { - if (bundle.add->dest_type == nir_type_float32) - return pan_pack_add_fmin_f32(clause, bundle.add, regs); - else if (bundle.add->dest_type == nir_type_float16) - return pan_pack_add_fmin_v2f16(clause, bundle.add, regs); - unreachable("TODO"); - } else { - if (bundle.add->dest_type == nir_type_float32) - return pan_pack_add_fmax_f32(clause, bundle.add, regs); - else if (bundle.add->dest_type == nir_type_float16) - return pan_pack_add_fmax_v2f16(clause, bundle.add, regs); - unreachable("TODO"); - } - case BI_MOV: - unreachable("Packing todo"); - case BI_STORE: - assert(src0_u32 || src0_s32 || src0_f32); - switch (bundle.add->vector_channels) { - case 1: return pan_pack_add_store_i32(clause, bundle.add, regs); - case 2: return pan_pack_add_store_i64(clause, bundle.add, regs); - case 3: return pan_pack_add_store_i96(clause, bundle.add, regs); - case 4: return pan_pack_add_store_i128(clause, bundle.add, regs); - default: unreachable("Invalid channel count"); - } - case BI_STORE_VAR: - return pan_pack_add_st_cvt(clause, bundle.add, regs); - case BI_SPECIAL_ADD: - return bi_pack_add_special(clause, bundle.add, regs); - case BI_TABLE: - assert(bundle.add->dest_type == nir_type_float32); - return pan_pack_add_flogd_f32(clause, bundle.add, regs); - case BI_SELECT: - assert(nir_alu_type_get_type_size(bundle.add->src_types[0]) == 16); - return pan_pack_add_mkvec_v2i16(clause, bundle.add, regs); - case BI_TEXC: - return pan_pack_add_texc(clause, bundle.add, regs); - case BI_TEXC_DUAL: - unreachable("Packing todo"); - case BI_TEXS: - assert(f16 || f32); - - if (f16) - return pan_pack_add_texs_2d_f16(clause, bundle.add, regs); - else - return pan_pack_add_texs_2d_f32(clause, bundle.add, regs); -case BI_ROUND: - unreachable("Packing todo"); - case BI_ZS_EMIT: - return pan_pack_add_zs_emit(clause, bundle.add, regs); - default: - unreachable("Cannot encode class as ADD"); - } -} - struct bi_packed_bundle { uint64_t lo; uint64_t hi; diff --git a/src/panfrost/bifrost/gen_pack.py b/src/panfrost/bifrost/gen_pack.py deleted file mode 100644 index 7a0b71d9445..00000000000 --- a/src/panfrost/bifrost/gen_pack.py +++ /dev/null @@ -1,556 +0,0 @@ -# -# Copyright (C) 2020 Collabora, Ltd. -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. - -import sys -from bifrost_isa import parse_instructions, opname_to_c, COPYRIGHT -from mako.template import Template - -instructions = parse_instructions(sys.argv[1]) - -# Packs sources into an argument. Offset argument to work around a quirk of our -# compiler IR when dealing with staging registers (TODO: reorder in the IR to -# fix this) -def pack_sources(sources, body, pack_exprs, offset): - for i, src in enumerate(sources): - body.append('unsigned src{} = bi_get_src(ins, regs, {});'.format(i, i + offset)) - - # Validate the source - if src[1] != 0xFF: - body.append('assert((1 << src{}) & {});'.format(i, hex(src[1]))) - - # Sources are state-invariant - for state in pack_exprs: - state.append('(src{} << {})'.format(i, src[0])) - - body.append('') - -# Gets the argument that the source modifier applies to from the name if -# applicable, otherwise defaults to the first argument - -def mod_arg(mod): - return int(mod[-1]) if mod[-1] in "0123" else 0 - -# Widen/lane/swz/swap/replicate modifiers conceptually act as a combined extend -# + swizzle. We look at the size of the argument to determine if we apply -# them, and look at the swizzle to pick which one. - -def pack_widen(mod, opts, body, pack_exprs): - marg = mod_arg(mod) - - body.append('unsigned {}_sz = nir_alu_type_get_type_size(ins->src_types[{}]);'.format(mod, mod_arg(mod))) - body.append('unsigned {}_temp = 0;'.format(mod)) - - first = True - for i, op in enumerate(opts): - if op is None or op == 'reserved': - continue - - t_else = 'else ' if not first else '' - first = False - - if op in ['none', 'w0']: - body.append('{}if ({}_sz == 32) {}_temp = {};'.format(t_else, mod, mod, i)) - elif op == 'd0': - body.append('{}if ({}_sz == 64) {}_temp = {};'.format(t_else, mod, mod, i)) - else: - assert(op[0] in ['h', 'b']) - sz = 16 if op[0] == 'h' else 8 - - # Condition on the swizzle - conds = ['(ins->swizzle[{}][{}] % 4) == {}'.format(marg, idx, lane) for idx, lane in enumerate(op[1:])] - cond = " && ".join(conds) - - body.append('{}if ({}_sz == {} && {}) {}_temp = {};'.format(t_else, mod, sz, cond, mod, i)) - body.append('else unreachable("Could not pattern match widen");') - - return mod + '_temp' - -# abs/neg are stored in ins->src_{abs,neg}[src] arrays -def pack_absneg(mod, opts, body, pack_exprs): - return 'ins->src_{}[{}]'.format(mod[0:-1] if mod[-1] in "0123" else mod, mod_arg(mod)) - -# ins->round is the native format (RTE/RTP/RTN/RTZ) for most ops. But there -# are some others we might encounter that we don't support in the IR at this -# point, and there are a few that force a subset of round modes. - -def pack_round(mod, opts, body, pack_exprs): - if opts == ['none', 'rtz']: - body.append('assert(ins->round == BI_ROUND_NONE || ins->round == BI_ROUND_RTZ);') - return '(ins->round == BI_ROUND_RTZ) ? 1 : 0' - elif opts == ['rtn', 'rtp']: - body.append('assert(ins->round == BI_ROUND_RTN || ins->round == BI_ROUND_RTP);') - return '(ins->round == BI_ROUND_RTP) ? 1 : 0' - elif opts[0:4] == ['none', 'rtp', 'rtn', 'rtz']: - return 'ins->round' - else: - assert False - -# Likewise, matches our native format - -def pack_clamp(mod, opts, body, pack_exprs): - if opts == ['none', 'clamp_0_inf', 'clamp_m1_1', 'clamp_0_1']: - return 'ins->clamp' - elif opts == ['none', 'clamp_0_1']: - body.append('assert(ins->clamp == BI_CLAMP_NONE || ins->clamp == BI_CLAMP_CLAMP_0_1);') - return '(ins->clamp == BI_CLAMP_CLAMP_0_1) ? 1 : 0' - else: - assert False - -# Our modifiers match up in name, but there is no shortage of orders. So just -# emit a table on the fly for it, since you won't get something much better. -# ENUM_BI_COND must be kept synced with `enum bi_cond` in compiler.h - -ENUM_BI_COND = [ - "al", - "lt", - "le", - "ge", - "gt", - "eq", - "ne", -] - -def pack_cmpf(mod, opts, body, pack_exprs): - # Generate a table mapping ENUM_BI_COND to opts, or an invalid - # sentintel if not used (which will then be asserted out in a debug build). - table = [str(opts.index(x)) if x in opts else '~0' for x in ENUM_BI_COND] - - body.append('unsigned cmpf_table[] = {') - body.append(' ' + ', '.join(table)) - body.append('};') - - return 'cmpf_table[ins->cond]' - -# Since our IR is explicitly typed, we look at the size/sign to determine sign -# extension behaviour -def pack_extend(mod, opts, body, pack_exprs): - body.append('ASSERTED bool {}_small = nir_alu_type_get_type_size(ins->src_types[{}]) <= 16;'.format(mod, mod_arg(mod))) - body.append('bool {}_signed = nir_alu_type_get_base_type(ins->src_types[{}]) == nir_type_int;'.format(mod, mod_arg(mod))) - - if opts == ['none', 'sext', 'zext', 'reserved']: - return '{}_small ? ({}_signed ? 1 : 2) : 0'.format(mod, mod) - else: - assert opts == ['zext', 'sext'] - body.append('assert({}_small);'.format(mod)) - return '{}_signed ? 1 : 0'.format(mod) - -def pack_not_src1(mod, opts, body, pack_exprs): - return 'ins->bitwise.src1_invert ? {} : {}'.format(opts.index('not'), opts.index('none')) - -def pack_not_result(mod, opts, body, pack_exprs): - return 'ins->bitwise.dest_invert ? {} : {}'.format(opts.index('not'), opts.index('none')) - -REGISTER_FORMATS = { - 'f64': 'nir_type_float64', - 'f32': 'nir_type_float32', - 'f16': 'nir_type_float16', - 'u64': 'nir_type_uint64', - 'u32': 'nir_type_uint32', - 'u16': 'nir_type_uint16', - 'i64': 'nir_type_int64', - 's32': 'nir_type_int32', - 's16': 'nir_type_int16' -} - -def pack_register_format(mod, opts, body, pack_exprs): - body.append('unsigned {}_temp = 0;'.format(mod)) - - first = True - auto = None - for i, op in enumerate(opts): - if op is None or op == 'reserved': - continue - - if op == 'auto': - assert(auto == None) - auto = i - continue - - t_else = 'else ' if not first else '' - first = False - nir_type = REGISTER_FORMATS.get(op) - - if nir_type: - body.append('{}if (ins->format == {}) {}_temp = {};'.format(t_else, nir_type, mod, i)) - - assert not first - if auto is None: - body.append('else unreachable("Could not pattern match register format");') - else: - body.append('else {}_temp = {};'.format(mod, auto)) - return mod + '_temp' - -def pack_seg(mod, opts, body, pack_exprs): - if len(opts) == 8: - body.append('assert(ins->segment);') - return 'ins->segment' - elif opts == ['none', 'wls']: - body.append('assert(ins->segment == BI_SEG_NONE || ins->segment == BI_SEG_WLS);') - return 'ins->segment == BI_SEG_WLS ? 1 : 0' - else: - assert(False) - -# Processes modifiers. If used directly, emits a pack. Otherwise, just -# processes the value (grabbing it from the IR). This must sync with the IR. - -modifier_map = { - "widen": pack_widen, - "widen0": pack_widen, - "widen1": pack_widen, - "lane": pack_widen, - "lane0": pack_widen, - "lane1": pack_widen, - "lane2": pack_widen, - "lane3": pack_widen, - "lanes0": pack_widen, - "lanes1": pack_widen, - "lanes2": pack_widen, - "swz": pack_widen, - "swz0": pack_widen, - "swz1": pack_widen, - "swz2": pack_widen, - "swap0": pack_widen, - "swap1": pack_widen, - "swap2": pack_widen, - "replicate0": pack_widen, - "replicate1": pack_widen, - - "abs": pack_absneg, - "abs0": pack_absneg, - "abs1": pack_absneg, - "abs2": pack_absneg, - "neg": pack_absneg, - "neg0": pack_absneg, - "neg1": pack_absneg, - "neg2": pack_absneg, - - "extend": pack_extend, - "extend0": pack_extend, - "extend1": pack_extend, - "extend2": pack_extend, - "sign0": pack_extend, - "sign1": pack_extend, - - "clamp": pack_clamp, - "round": pack_round, - "cmpf": pack_cmpf, - "varying_name": lambda a,b,c,d: 'ins->load_vary.var_id', - "not1": pack_not_src1, - "not_result": pack_not_result, - "register_format": pack_register_format, - "seg": pack_seg, - "update": lambda a,b,c,d: 'ins->load_vary.update_mode', - - # Just a minus one modifier - "vecsize": lambda a,b,c,d: 'ins->vector_channels - 1', - - # 0: compute 1: zero - "lod_mode": lambda a,b,c,d: '1 - ins->texture.compute_lod', - "skip": lambda a,b,c,d: 'ins->skip', - - # Not much choice in the matter... - "divzero": lambda a,b,c,d: '0', - "sem": lambda a,b,c,d: '0', # IEEE 754 compliant NaN rules - - # For +ZS_EMIT, infer modifiers from specified sources - "z": lambda a,b,c,d: '(ins->src[0] != 0)', - "stencil": lambda a,b,c,d: '(ins->src[1] != 0)', - - # For +LD_VAR, infer sample from load_vary.interp_mode - "sample": lambda a,b,c,d: 'ins->load_vary.interp_mode', - - # +CLPER - "lane_op": lambda a,b,c,d: 'ins->special.clper.lane_op_mod', - "inactive_result": lambda a,b,c,d: 'ins->special.clper.inactive_res', - - # +CLPER and +WMASK - "subgroup": lambda a,b,c,d: 'ins->special.subgroup_sz', - - # We don't support these in the IR yet (TODO) - "saturate": lambda a,b,c,d: '0', # clamp to min/max int - "mask": lambda a,b,c,d: '0', # clz(~0) = ~0 - "result_type": lambda a,opts,c,d: str(opts.index('m1')), # #1, #1.0, ~0 for cmp - "special": lambda a,b,c,d: '0', # none, which source wins.. - "offset": lambda a,b,c,d: '0', # sin/cos thing - "adj": lambda a,b,c,d: '0', # sin/cos thing - "sqrt": lambda a,b,c,d: '0', # sin/cos thing - "log": lambda a,b,c,d: '1', # frexpe mode -- TODO: other transcendentals for g71 - "scale": lambda a,b,c,d: '0', # sin/cos thing - "precision": lambda a,b,c,d: '0', # log thing - "mode": lambda a,b,c,d: '0', # log thing - "func": lambda a,b,c,d: '0', # pow special case thing - "h": lambda a,b,c,d: '0', # VN_ASST1.f16 - "l": lambda a,b,c,d: '0', # VN_ASST1.f16 - "function": lambda a,b,c,d: '3', # LD_VAR_FLAT none - "preserve_null": lambda a,b,c,d: '0', # SEG_ADD none - "bytes2": lambda a,b,c,d: '0', # NIR shifts are in bits - "result_word": lambda a,b,c,d: '0', # 32-bit only shifts for now (TODO) - "source": lambda a,b,c,d: '7', # cycle_counter for LD_GCLK - "threads": lambda a,b,c,d: '0', # IMULD odd - "combine": lambda a,b,c,d: '0', # BRANCHC any - "format": lambda a,b,c,d: '1', # LEA_TEX_IMM u32 - "test_mode": lambda a,b,c,d: '0', # JUMP_EX z - "stack_mode": lambda a,b,c,d: '2', # JUMP_EX none - "atom_opc": lambda a,b,c,d: '2', # ATOM_C aadd - "mux": lambda a,b,c,d: '1', # MUX int_zero -} - -def pack_modifier(mod, width, default, opts, body, pack_exprs): - # Invoke the specific one - fn = modifier_map.get(mod) - - if fn is None: - return None - - expr = fn(mod, opts, body, pack_exprs) - body.append('unsigned {} = {};'.format(mod, expr)) - - # Validate we don't overflow - try: - assert(int(expr) < (1 << width)) - except: - body.append('assert({} < {});'.format(mod, (1 << width))) - - body.append('') - - return True - -# Compiles an S-expression (and/or/eq/neq, modifiers, `ordering`, immediates) -# into a C boolean expression suitable to stick in an if-statement. Takes an -# imm_map to map modifiers to immediate values, parametrized by the ctx that -# we're looking up in (the first, non-immediate argument of the equality) - -SEXPR_BINARY = { - "and": "&&", - "or": "||", - "eq": "==", - "neq": "!=" -} - -def compile_s_expr(expr, imm_map, ctx): - if expr[0] == 'alias': - return compile_s_expr(expr[1], imm_map, ctx) - elif expr == ['eq', 'ordering', '#gt']: - return '(src0 > src1)' - elif expr == ['neq', 'ordering', '#lt']: - return '(src0 >= src1)' - elif expr == ['neq', 'ordering', '#gt']: - return '(src0 <= src1)' - elif expr == ['eq', 'ordering', '#lt']: - return '(src0 < src1)' - elif expr == ['eq', 'ordering', '#eq']: - return '(src0 == src1)' - elif isinstance(expr, list): - sep = " {} ".format(SEXPR_BINARY[expr[0]]) - return "(" + sep.join([compile_s_expr(s, imm_map, expr[1]) for s in expr[1:]]) + ")" - elif expr[0] == '#': - return str(imm_map[ctx][expr[1:]]) - else: - return expr - -# Packs a derived value. We just iterate through the possible choices and test -# whether the encoding matches, and if so we use it. - -def pack_derived(pos, exprs, imm_map, body, pack_exprs): - body.append('unsigned derived_{} = 0;'.format(pos)) - - first = True - for i, expr in enumerate(exprs): - if expr is not None: - cond = compile_s_expr(expr, imm_map, None) - body.append('{}if {} derived_{} = {};'.format('' if first else 'else ', cond, pos, i)) - first = False - - assert (not first) - body.append('else unreachable("No pattern match at pos {}");'.format(pos)) - body.append('') - - assert(pos is not None) - pack_exprs.append('(derived_{} << {})'.format(pos, pos)) - -# Table mapping immediate names in the machine to expressions of `ins` to -# lookup the value in the IR, performing adjustments as needed - -IMMEDIATE_TABLE = { - 'attribute_index': 'ins->attribute.index', - 'varying_index': 'ins->texture.varying_index', - 'index': 'ins->load_vary.index', - 'texture_index': 'ins->texture.texture_index', - 'sampler_index': 'ins->texture.sampler_index', - 'table': '63', # Bindless (flat addressing) mode for DTSEL_IMM - - # Not supported in the IR (TODO) - 'shift': '0', - 'fill': '0', # WMASK -} - -# Generates a routine to pack a single variant of a single- instruction. -# Template applies the needed formatting and combine to OR together all the -# pack_exprs to avoid bit fields. -# -# Argument swapping is sensitive to the order of operations. Dependencies: -# sources (RW), modifiers (RW), derived values (W). Hence we emit sources and -# modifiers first, then perform a swap if necessary overwriting -# sources/modifiers, and last calculate derived values and pack. - -variant_template = Template("""static inline unsigned -pan_pack_${name}(bi_clause *clause, bi_instruction *ins, bi_registers *regs) -{ -${"\\n".join([(" " + x) for x in common_body])} -% if single_state: -% for (pack_exprs, s_body, _) in states: -${"\\n".join([" " + x for x in s_body + ["return {};".format( " | ".join(pack_exprs))]])} -% endfor -% else: -% for i, (pack_exprs, s_body, cond) in enumerate(states): - ${'} else ' if i > 0 else ''}if ${cond} { -${"\\n".join([" " + x for x in s_body + ["return {};".format(" | ".join(pack_exprs))]])} -% endfor - } else { - unreachable("No matching state found in ${name}"); - } -% endif -} -""") - -def pack_variant(opname, states): - # Expressions to be ORed together for the final pack, an array per state - pack_exprs = [[hex(state[1]["exact"][1])] for state in states] - - # Computations which need to be done to encode first, across states - common_body = [] - - # Map from modifier names to a map from modifier values to encoded values - # String -> { String -> Uint }. This can be shared across states since - # modifiers are (except the pos values) constant across state. - imm_map = {} - - # Pack sources. Offset over to deal with staging/immediate weirdness in our - # IR (TODO: reorder sources upstream so this goes away). Note sources are - # constant across states. - staging = states[0][1].get("staging", "") - offset = 0 - if staging in ["r", "rw"]: - offset += 1 - - pack_sources(states[0][1].get("srcs", []), common_body, pack_exprs, offset) - - modifiers_handled = [] - for st in states: - for ((mod, _, width), default, opts) in st[1].get("modifiers", []): - if mod in modifiers_handled: - continue - - modifiers_handled.append(mod) - - if pack_modifier(mod, width, default, opts, common_body, pack_exprs) is None: - return None - - imm_map[mod] = { x: y for y, x in enumerate(opts) } - - for i, st in enumerate(states): - for ((mod, pos, width), default, opts) in st[1].get("modifiers", []): - if pos is not None: - pack_exprs[i].append('({} << {})'.format(mod, pos)) - - for ((src_a, src_b), cond, remap) in st[1].get("swaps", []): - # Figure out which vars to swap, in order to swap the arguments. This - # always includes the sources themselves, and may include source - # modifiers (with the same source indices). We swap based on which - # matches A, this is arbitrary but if we swapped both nothing would end - # up swapping at all since it would swap back. - - vars_to_swap = ['src'] - for ((mod, _, width), default, opts) in st[1].get("modifiers", []): - if mod[-1] in str(src_a): - vars_to_swap.append(mod[0:-1]) - - common_body.append('if {}'.format(compile_s_expr(cond, imm_map, None)) + ' {') - - # Emit the swaps. We use a temp, and wrap in a block to avoid naming - # collisions with multiple swaps. {{Doubling}} to escape the format. - - for v in vars_to_swap: - common_body.append(' {{ unsigned temp = {}{}; {}{} = {}{}; {}{} = temp; }}'.format(v, src_a, v, src_a, v, src_b, v, src_b)) - - # Also, remap. Bidrectional swaps are explicit in the XML. - for v in remap: - maps = remap[v] - imm = imm_map[v] - - for i, l in enumerate(maps): - common_body.append(' {}if ({} == {}) {} = {};'.format('' if i == 0 else 'else ', v, imm[l], v, imm[maps[l]])) - - common_body.append('}') - common_body.append('') - - for (name, pos, width) in st[1].get("immediates", []): - if name not in IMMEDIATE_TABLE: - return None - - common_body.append('unsigned {} = {};'.format(name, IMMEDIATE_TABLE[name])) - - for st in pack_exprs: - st.append('({} << {})'.format(name, pos)) - - if staging == 'r': - common_body.append('bi_read_staging_register(clause, ins);') - elif staging == 'w': - common_body.append('bi_write_staging_register(clause, ins);') - elif staging == '': - pass - else: - assert staging == 'rw' - # XXX: register allocation requirement (!) - common_body.append('bi_read_staging_register(clause, ins);') - common_body.append('assert(ins->src[0] == ins->dest);') - - # After this, we have to branch off, since deriveds *do* vary based on state. - state_body = [[] for s in states] - - for i, (_, st) in enumerate(states): - for ((pos, width), exprs) in st.get("derived", []): - pack_derived(pos, exprs, imm_map, state_body[i], pack_exprs[i]) - - # How do we pick a state? Accumulate the conditions - state_conds = [compile_s_expr(st[0], imm_map, None) for st in states] if len(states) > 1 else [None] - - if state_conds == None: - assert (states[0][0] == None) - - # Finally, we'll collect everything together - return variant_template.render(name = opname_to_c(opname), states = zip(pack_exprs, state_body, state_conds), common_body = common_body, single_state = (len(states) == 1)) - -HEADER = COPYRIGHT + """#ifndef _BI_GENERATED_PACK_H -#define _BI_GENERATED_PACK_H - -#include "compiler.h" -#include "bi_pack_helpers.h" -""" - -print(HEADER) - -packs = [pack_variant(e, instructions[e]) for e in instructions] -for p in packs: - print(p) - -print("#endif") diff --git a/src/panfrost/bifrost/meson.build b/src/panfrost/bifrost/meson.build index 5bd3eb7d657..4e0f2b91613 100644 --- a/src/panfrost/bifrost/meson.build +++ b/src/panfrost/bifrost/meson.build @@ -81,20 +81,6 @@ bi_packer_c = custom_target( depend_files : files('bifrost_isa.py'), ) -bi_generated_pack_h = custom_target( - 'bi_generated_pack.h', - input : ['gen_pack.py', 'ISA.xml'], - output : 'bi_generated_pack.h', - command : [prog_python, '@INPUT@'], - capture : true, - depend_files : files('bifrost_isa.py'), -) - -idep_bi_generated_pack_h = declare_dependency( - sources : [bi_generated_pack_h], - include_directories : include_directories('.'), -) - bi_opcodes_h = custom_target( 'bi_opcodes.h', input : ['bi_opcodes.h.py', 'ISA.xml'], @@ -138,7 +124,7 @@ libpanfrost_bifrost = static_library( 'panfrost_bifrost', [libpanfrost_bifrost_files, bifrost_nir_algebraic_c, bi_opcodes_c, bi_printer_c, bi_packer_c], include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux, inc_panfrost_hw], - dependencies: [idep_nir, idep_bi_generated_pack_h, idep_bi_opcodes_h, idep_bi_builder_h], + dependencies: [idep_nir, idep_bi_opcodes_h, idep_bi_builder_h], link_with: [libpanfrost_util, libpanfrost_bifrost_disasm], c_args : [no_override_init_args], gnu_symbol_visibility : 'hidden',