/* * Copyright (C) 2021 Alyssa Rosenzweig * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "agx_compiler.h" /* Binary patches needed for branch offsets */ struct agx_branch_fixup { /* Offset into the binary to patch */ off_t offset; /* Value to patch with will be block->offset */ agx_block *block; }; /* Texturing has its own operands */ static unsigned agx_pack_sample_coords(agx_index index, bool *flag) { /* TODO: how to encode 16-bit coords? */ assert(index.size == AGX_SIZE_32); assert(index.value < 0x100); *flag = index.discard; return index.value; } static unsigned agx_pack_texture(agx_index index, unsigned *flag) { /* TODO: indirection */ assert(index.type == AGX_INDEX_IMMEDIATE); *flag = 0; return index.value; } static unsigned agx_pack_sampler(agx_index index, bool *flag) { /* TODO: indirection */ assert(index.type == AGX_INDEX_IMMEDIATE); *flag = 0; return index.value; } static unsigned agx_pack_sample_offset(agx_index index, bool *flag) { /* TODO: offsets */ assert(index.type == AGX_INDEX_NULL); *flag = 0; return 0; } static unsigned agx_pack_lod(agx_index index) { /* Immediate zero */ if (index.type == AGX_INDEX_IMMEDIATE && index.value == 0) return 0; /* Otherwise must be a 16-bit float immediate */ assert(index.type == AGX_INDEX_REGISTER); assert(index.size == AGX_SIZE_16); assert(index.value < 0x100); return index.value; } /* Load/stores have their own operands */ static unsigned agx_pack_memory_reg(agx_index index, bool *flag) { assert(index.size == AGX_SIZE_16 || index.size == AGX_SIZE_32); assert(index.size == AGX_SIZE_16 || (index.value & 1) == 0); assert(index.value < 0x100); *flag = (index.size == AGX_SIZE_32); return index.value; } static unsigned agx_pack_memory_base(agx_index index, bool *flag) { assert(index.size == AGX_SIZE_64); assert((index.value & 1) == 0); if (index.type == AGX_INDEX_UNIFORM) { assert(index.value < 0x200); *flag = 1; return index.value; } else { assert(index.value < 0x100); *flag = 0; return index.value; } } static unsigned agx_pack_memory_index(agx_index index, bool *flag) { if (index.type == AGX_INDEX_IMMEDIATE) { assert(index.value < 0x10000); *flag = 1; return index.value; } else { assert(index.type == AGX_INDEX_REGISTER); assert((index.value & 1) == 0); assert(index.value < 0x100); *flag = 0; return index.value; } } /* ALU goes through a common path */ static unsigned agx_pack_alu_dst(agx_index dest) { assert(dest.type == AGX_INDEX_REGISTER); unsigned reg = dest.value; enum agx_size size = dest.size; assert(reg < 0x100); /* RA invariant: alignment of half-reg */ if (size >= AGX_SIZE_32) assert((reg & 1) == 0); return (dest.cache ? (1 << 0) : 0) | ((size >= AGX_SIZE_32) ? (1 << 1) : 0) | ((size == AGX_SIZE_64) ? (1 << 2) : 0) | ((reg << 2)); } static unsigned agx_pack_alu_src(agx_index src) { unsigned value = src.value; enum agx_size size = src.size; if (src.type == AGX_INDEX_IMMEDIATE) { /* Flags 0 for an 8-bit immediate */ assert(value < 0x100); return (value & BITFIELD_MASK(6)) | ((value >> 6) << 10); } else if (src.type == AGX_INDEX_UNIFORM) { assert(size == AGX_SIZE_16 || size == AGX_SIZE_32); assert(value < 0x200); return (value & BITFIELD_MASK(6)) | ((value >> 8) << 6) | ((size == AGX_SIZE_32) ? (1 << 7) : 0) | (0x1 << 8) | (((value >> 6) & BITFIELD_MASK(2)) << 10); } else { assert(src.type == AGX_INDEX_REGISTER); assert(!(src.cache && src.discard)); unsigned hint = src.discard ? 0x3 : src.cache ? 0x2 : 0x1; unsigned size_flag = (size == AGX_SIZE_64) ? 0x3 : (size == AGX_SIZE_32) ? 0x2 : (size == AGX_SIZE_16) ? 0x0 : 0x0; return (value & BITFIELD_MASK(6)) | (hint << 6) | (size_flag << 8) | (((value >> 6) & BITFIELD_MASK(2)) << 10); } } static unsigned agx_pack_cmpsel_src(agx_index src, enum agx_size dest_size) { unsigned value = src.value; ASSERTED enum agx_size size = src.size; if (src.type == AGX_INDEX_IMMEDIATE) { /* Flags 0x4 for an 8-bit immediate */ assert(value < 0x100); return (value & BITFIELD_MASK(6)) | (0x4 << 6) | ((value >> 6) << 10); } else if (src.type == AGX_INDEX_UNIFORM) { assert(size == AGX_SIZE_16 || size == AGX_SIZE_32); assert(size == dest_size); assert(value < 0x200); return (value & BITFIELD_MASK(6)) | ((value >> 8) << 6) | (0x3 << 7) | (((value >> 6) & BITFIELD_MASK(2)) << 10); } else { assert(src.type == AGX_INDEX_REGISTER); assert(!(src.cache && src.discard)); assert(size == AGX_SIZE_16 || size == AGX_SIZE_32); assert(size == dest_size); unsigned hint = src.discard ? 0x3 : src.cache ? 0x2 : 0x1; return (value & BITFIELD_MASK(6)) | (hint << 6) | (((value >> 6) & BITFIELD_MASK(2)) << 10); } } static unsigned agx_pack_sample_mask_src(agx_index src) { unsigned value = src.value; unsigned packed_value = (value & BITFIELD_MASK(6)) | (((value >> 6) & BITFIELD_MASK(2)) << 10); if (src.type == AGX_INDEX_IMMEDIATE) { assert(value < 0x100); return packed_value | (1 << 7); } else { assert(src.type == AGX_INDEX_REGISTER); assert(!(src.cache && src.discard)); return packed_value; } } static unsigned agx_pack_float_mod(agx_index src) { return (src.abs ? (1 << 0) : 0) | (src.neg ? (1 << 1) : 0); } static bool agx_all_16(agx_instr *I) { agx_foreach_dest(I, d) { if (!agx_is_null(I->dest[d]) && I->dest[d].size != AGX_SIZE_16) return false; } agx_foreach_src(I, s) { if (!agx_is_null(I->src[s]) && I->src[s].size != AGX_SIZE_16) return false; } return true; } /* Generic pack for ALU instructions, which are quite regular */ static void agx_pack_alu(struct util_dynarray *emission, agx_instr *I) { struct agx_opcode_info info = agx_opcodes_info[I->op]; bool is_16 = agx_all_16(I) && info.encoding_16.exact; struct agx_encoding encoding = is_16 ? info.encoding_16 : info.encoding; assert(encoding.exact && "invalid encoding"); uint64_t raw = encoding.exact; uint16_t extend = 0; // TODO: assert saturable if (I->saturate) raw |= (1 << 6); if (info.nr_dests) { assert(info.nr_dests == 1); unsigned D = agx_pack_alu_dst(I->dest[0]); unsigned extend_offset = (sizeof(extend)*8) - 4; raw |= (D & BITFIELD_MASK(8)) << 7; extend |= ((D >> 8) << extend_offset); } else if (info.immediates & AGX_IMMEDIATE_NEST) { raw |= (I->invert_cond << 8); raw |= (I->nest << 11); raw |= (I->icond << 13); } for (unsigned s = 0; s < info.nr_srcs; ++s) { bool is_cmpsel = (s >= 2) && (I->op == AGX_OPCODE_ICMPSEL || I->op == AGX_OPCODE_FCMPSEL); unsigned src = is_cmpsel ? agx_pack_cmpsel_src(I->src[s], I->dest[0].size) : agx_pack_alu_src(I->src[s]); unsigned src_short = (src & BITFIELD_MASK(10)); unsigned src_extend = (src >> 10); /* Size bit always zero and so omitted for 16-bit */ if (is_16 && !is_cmpsel) assert((src_short & (1 << 9)) == 0); if (info.is_float) { unsigned fmod = agx_pack_float_mod(I->src[s]); unsigned fmod_offset = is_16 ? 9 : 10; src_short |= (fmod << fmod_offset); } else if (I->op == AGX_OPCODE_IMAD || I->op == AGX_OPCODE_IADD) { bool zext = I->src[s].abs; bool extends = I->src[s].size < AGX_SIZE_64; unsigned sxt = (extends && !zext) ? (1 << 10) : 0; assert(!I->src[s].neg || s == 1); src_short |= sxt; } /* Sources come at predictable offsets */ unsigned offset = 16 + (12 * s); raw |= (((uint64_t) src_short) << offset); /* Destination and each source get extended in reverse order */ unsigned extend_offset = (sizeof(extend)*8) - ((s + 3) * 2); extend |= (src_extend << extend_offset); } if ((I->op == AGX_OPCODE_IMAD || I->op == AGX_OPCODE_IADD) && I->src[1].neg) raw |= (1 << 27); if (info.immediates & AGX_IMMEDIATE_TRUTH_TABLE) { raw |= (I->truth_table & 0x3) << 26; raw |= (uint64_t) (I->truth_table >> 2) << 38; } else if (info.immediates & AGX_IMMEDIATE_SHIFT) { raw |= (uint64_t) (I->shift & 1) << 39; raw |= (uint64_t) (I->shift >> 2) << 52; } else if (info.immediates & AGX_IMMEDIATE_BFI_MASK) { raw |= (uint64_t) (I->mask & 0x3) << 38; raw |= (uint64_t) ((I->mask >> 2) & 0x3) << 50; raw |= (uint64_t) ((I->mask >> 4) & 0x1) << 63; } else if (info.immediates & AGX_IMMEDIATE_SR) { raw |= (uint64_t) (I->sr & 0x3F) << 16; raw |= (uint64_t) (I->sr >> 6) << 26; } else if (info.immediates & AGX_IMMEDIATE_WRITEOUT) raw |= (uint64_t) (I->imm) << 8; else if (info.immediates & AGX_IMMEDIATE_IMM) raw |= (uint64_t) (I->imm) << 16; else if (info.immediates & AGX_IMMEDIATE_ROUND) raw |= (uint64_t) (I->imm) << 26; else if (info.immediates & (AGX_IMMEDIATE_FCOND | AGX_IMMEDIATE_ICOND)) raw |= (uint64_t) (I->fcond) << 61; /* Determine length bit */ unsigned length = encoding.length_short; unsigned short_mask = (1 << length) - 1; bool length_bit = (extend || (raw & ~short_mask)); if (encoding.extensible && length_bit) { raw |= (1 << 15); length += (length > 8) ? 4 : 2; } /* Pack! */ if (length <= sizeof(uint64_t)) { unsigned extend_offset = ((length - sizeof(extend)) * 8); /* XXX: This is a weird special case */ if (I->op == AGX_OPCODE_IADD) extend_offset -= 16; raw |= (uint64_t) extend << extend_offset; memcpy(util_dynarray_grow_bytes(emission, 1, length), &raw, length); } else { /* So far, >8 byte ALU is only to store the extend bits */ unsigned extend_offset = (((length - sizeof(extend)) * 8) - 64); unsigned hi = ((uint64_t) extend) << extend_offset; memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8); memcpy(util_dynarray_grow_bytes(emission, 1, length - 8), &hi, length - 8); } } static void agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, agx_instr *I) { switch (I->op) { case AGX_OPCODE_LD_TILE: case AGX_OPCODE_ST_TILE: { bool load = (I->op == AGX_OPCODE_LD_TILE); unsigned D = agx_pack_alu_dst(load ? I->dest[0] : I->src[0]); unsigned rt = 0; /* TODO */ unsigned mask = I->mask ?: 0xF; assert(mask < 0x10); uint64_t raw = 0x09 | (load ? (1 << 6) : 0) | ((uint64_t) (D & BITFIELD_MASK(8)) << 7) | ((uint64_t) (I->format) << 24) | ((uint64_t) (rt) << 32) | (load ? (1ull << 35) : 0) | ((uint64_t) (mask) << 36) | ((uint64_t) 0x0380FC << 40) | (((uint64_t) (D >> 8)) << 60); unsigned size = 8; memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size); break; } case AGX_OPCODE_SAMPLE_MASK: { unsigned S = agx_pack_sample_mask_src(I->src[0]); uint64_t raw = 0x7fc1 | ((S & 0xff) << 16) | (0x3 << 24) | ((S >> 8) << 26) | (0x158ull << 32); unsigned size = 8; memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size); break; } case AGX_OPCODE_LD_VARY: case AGX_OPCODE_LD_VARY_FLAT: { bool flat = (I->op == AGX_OPCODE_LD_VARY_FLAT); unsigned D = agx_pack_alu_dst(I->dest[0]); unsigned channels = (I->channels & 0x3); assert(I->mask < 0xF); /* 0 indicates full mask */ agx_index index_src = I->src[0]; assert(index_src.type == AGX_INDEX_IMMEDIATE); assert(!(flat && I->perspective)); unsigned index = index_src.value; bool kill = false; // TODO: optimize uint64_t raw = 0x21 | (flat ? (1 << 7) : 0) | (I->perspective ? (1 << 6) : 0) | ((D & 0xFF) << 7) | (1ull << 15) | /* XXX */ (((uint64_t) index) << 16) | (((uint64_t) channels) << 30) | (!flat ? (1ull << 46) : 0) | /* XXX */ (kill ? (1ull << 52) : 0) | /* XXX */ (((uint64_t) (D >> 8)) << 56); unsigned size = 8; memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size); break; } case AGX_OPCODE_ST_VARY: { agx_index index_src = I->src[0]; agx_index value = I->src[1]; assert(index_src.type == AGX_INDEX_IMMEDIATE); assert(value.type == AGX_INDEX_REGISTER); assert(value.size == AGX_SIZE_32); uint64_t raw = 0x11 | (I->last ? (1 << 7) : 0) | ((value.value & 0x3F) << 9) | (((uint64_t) index_src.value) << 16) | (0x80 << 16) | /* XXX */ ((value.value >> 6) << 24) | (0x8 << 28); /* XXX */ unsigned size = 4; memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size); break; } case AGX_OPCODE_DEVICE_LOAD: { assert(I->mask != 0); assert(I->format <= 0x10); bool Rt, At, Ot; unsigned R = agx_pack_memory_reg(I->dest[0], &Rt); unsigned A = agx_pack_memory_base(I->src[0], &At); unsigned O = agx_pack_memory_index(I->src[1], &Ot); unsigned u1 = 1; // XXX unsigned u3 = 0; unsigned u4 = 4; // XXX unsigned u5 = 0; bool L = true; /* TODO: when would you want short? */ uint64_t raw = 0x05 | ((I->format & BITFIELD_MASK(3)) << 7) | ((R & BITFIELD_MASK(6)) << 10) | ((A & BITFIELD_MASK(4)) << 16) | ((O & BITFIELD_MASK(4)) << 20) | (Ot ? (1 << 24) : 0) | (I->src[1].abs ? (1 << 25) : 0) | (u1 << 26) | (At << 27) | (u3 << 28) | (I->scoreboard << 30) | (((uint64_t) ((O >> 4) & BITFIELD_MASK(4))) << 32) | (((uint64_t) ((A >> 4) & BITFIELD_MASK(4))) << 36) | (((uint64_t) ((R >> 6) & BITFIELD_MASK(2))) << 40) | (((uint64_t) I->shift) << 42) | (((uint64_t) u4) << 44) | (L ? (1ull << 47) : 0) | (((uint64_t) (I->format >> 3)) << 48) | (((uint64_t) Rt) << 49) | (((uint64_t) u5) << 50) | (((uint64_t) I->mask) << 52) | (((uint64_t) (O >> 8)) << 56); unsigned size = L ? 8 : 6; memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size); break; } case AGX_OPCODE_TEXTURE_SAMPLE: { assert(I->mask != 0); assert(I->format <= 0x10); bool Rt, Ot, Ct, St; unsigned Tt; unsigned R = agx_pack_memory_reg(I->dest[0], &Rt); unsigned C = agx_pack_sample_coords(I->src[0], &Ct); unsigned T = agx_pack_texture(I->src[2], &Tt); unsigned S = agx_pack_sampler(I->src[3], &St); unsigned O = agx_pack_sample_offset(I->src[4], &Ot); unsigned D = agx_pack_lod(I->src[1]); unsigned U = 0; // TODO: what is sampler ureg? unsigned q1 = 0; // XXX unsigned q2 = 0; // XXX unsigned q3 = 12; // XXX unsigned kill = 0; // helper invocation kill bit unsigned q5 = 0; // XXX unsigned q6 = 0; // XXX uint32_t extend = ((U & BITFIELD_MASK(5)) << 0) | (kill << 5) | ((R >> 6) << 8) | ((C >> 6) << 10) | ((D >> 6) << 12) | ((T >> 6) << 14) | ((O & BITFIELD_MASK(6)) << 16) | (q6 << 22) | (Ot << 27) | ((S >> 6) << 28) | ((O >> 6) << 30); bool L = (extend != 0); assert(I->scoreboard == 0 && "todo"); uint64_t raw = 0x31 | (Rt ? (1 << 8) : 0) | ((R & BITFIELD_MASK(6)) << 9) | (L ? (1 << 15) : 0) | ((C & BITFIELD_MASK(6)) << 16) | (Ct ? (1 << 22) : 0) | (q1 << 23) | ((D & BITFIELD_MASK(6)) << 24) | (q2 << 30) | (((uint64_t) (T & BITFIELD_MASK(6))) << 32) | (((uint64_t) Tt) << 38) | (((uint64_t) I->dim) << 40) | (((uint64_t) q3) << 43) | (((uint64_t) I->mask) << 48) | (((uint64_t) I->lod_mode) << 52) | (((uint64_t) (S & BITFIELD_MASK(6))) << 32) | (((uint64_t) St) << 62) | (((uint64_t) q5) << 63); memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8); if (L) memcpy(util_dynarray_grow_bytes(emission, 1, 4), &extend, 4); break; } case AGX_OPCODE_JMP_EXEC_ANY: case AGX_OPCODE_JMP_EXEC_NONE: { /* We don't implement indirect branches */ assert(I->target != NULL); /* We'll fix the offset later. */ struct agx_branch_fixup fixup = { .block = I->target, .offset = emission->size }; util_dynarray_append(fixups, struct agx_branch_fixup, fixup); /* The rest of the instruction is fixed */ struct agx_opcode_info info = agx_opcodes_info[I->op]; uint64_t raw = info.encoding.exact; memcpy(util_dynarray_grow_bytes(emission, 1, 6), &raw, 6); break; } default: agx_pack_alu(emission, I); return; } } /* Relative branches may be emitted before their targets, so we patch the * binary to fix up the branch offsets after the main emit */ static void agx_fixup_branch(struct util_dynarray *emission, struct agx_branch_fixup fix) { /* Branch offset is 2 bytes into the jump instruction */ uint8_t *location = ((uint8_t *) emission->data) + fix.offset + 2; /* Offsets are relative to the jump instruction */ int32_t patch = (int32_t) fix.block->offset - (int32_t) fix.offset; /* Patch the binary */ memcpy(location, &patch, sizeof(patch)); } void agx_pack_binary(agx_context *ctx, struct util_dynarray *emission) { struct util_dynarray fixups; util_dynarray_init(&fixups, ctx); agx_foreach_block(ctx, block) { /* Relative to the start of the binary, the block begins at the current * number of bytes emitted */ block->offset = emission->size; agx_foreach_instr_in_block(block, ins) { agx_pack_instr(emission, &fixups, ins); } } util_dynarray_foreach(&fixups, struct agx_branch_fixup, fixup) agx_fixup_branch(emission, *fixup); util_dynarray_fini(&fixups); }