panfrost/midgard: Remove assembler

This code is outdated and unused; now that the compiler is mature,
there's no point keeping it around in-tree (or at all).

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
This commit is contained in:
Alyssa Rosenzweig 2019-04-24 02:43:23 +00:00
parent 2cd1aa3429
commit e4ec814c39
1 changed files with 0 additions and 643 deletions

View File

@ -1,643 +0,0 @@
"""
Copyright (C) 2018 Alyssa Rosenzweig
Copyright (c) 2013 Connor Abbott (connor@abbott.cx)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
import sys
import pprint
import struct
program = []
# Definitions from cwabbott's tools
t6xx_alu_ops = {
"fadd": 0x10,
"fmul": 0x14,
"fmin": 0x28,
"fmax": 0x2C,
"fmov": 0x30,
"ffloor": 0x36,
"fceil": 0x37,
"fdot3": 0x3C,
"fdot3r": 0x3D,
"fdot4": 0x3E,
"freduce": 0x3F,
"iadd": 0x40,
"isub": 0x46,
"imul": 0x58,
"imov": 0x7B,
"feq": 0x80,
"fne": 0x81,
"flt": 0x82,
"fle": 0x83,
"f2i": 0x99,
"f2u8": 0x9C,
"u2f": 0xBC,
"ieq": 0xA0,
"ine": 0xA1,
"ilt": 0xA4,
"ile": 0xA5,
"iand": 0x70,
"ior": 0x71,
"inot": 0x72,
"iandnot": 0x74,
"ixor": 0x76,
"ball": 0xA9,
"bany": 0xB1,
"i2f": 0xB8,
"csel": 0xC5,
"fatan_pt2": 0xE8,
"frcp": 0xF0,
"frsqrt": 0xF2,
"fsqrt": 0xF3,
"fexp2": 0xF4,
"flog2": 0xF5,
"fsin": 0xF6,
"fcos": 0xF7,
"fatan2_pt1": 0xF9,
}
t6xx_alu_bits = {
"vmul": 17,
"sadd": 19,
"vadd": 21,
"smul": 23,
"lut": 25,
"br": 26,
"branch": 27,
"constants": 32
}
t6xx_alu_size_bits = {
"vmul": 48,
"sadd": 32,
"vadd": 48,
"smul": 32,
"lut": 48,
"br": 16,
"branch": 48
}
t6xx_outmod = {
"none": 0,
"pos": 1,
"int": 2,
"sat": 3
}
t6xx_reg_mode = {
"quarter": 0,
"half": 1,
"full": 2,
"double": 3
}
t6xx_dest_override = {
"lower": 0,
"upper": 1,
"none": 2
}
t6xx_load_store_ops = {
"ld_st_noop": 0x03,
"ld_attr_16": 0x95,
"ld_attr_32": 0x94,
"ld_vary_16": 0x99,
"ld_vary_32": 0x98,
"ld_uniform_16": 0xAC,
"ld_uniform_32": 0xB0,
"st_vary_16": 0xD5,
"st_vary_32": 0xD4,
"ld_color_buffer_8": 0xBA
}
t6xx_tag = {
"texture": 0x3,
"load_store": 0x5,
"alu4": 0x8,
"alu8": 0x9,
"alu12": 0xA,
"alu16": 0xB,
}
def is_tag_alu(tag):
return (tag >= t6xx_tag["alu4"]) and (tag <= t6xx_tag["alu16"])
# Just an enum
ALU = 0
LDST = 1
TEXTURE = 2
# Constant types supported, mapping the constant prefix to the Python format
# string and the coercion function
constant_types = {
"f": ("f", float),
"h": ("e", float),
"i": ("i", int),
"s": ("h", int)
}
compact_branch_op = {
"jump": 1,
"branch": 2,
"discard": 4,
"write": 7
}
branch_condition = {
"false": 1,
"true": 2,
"always": 3,
}
# TODO: What else?
texture_op = {
"normal": 0x11,
"texelfetch": 0x14
}
texture_fmt = {
"2d": 0x02,
"3d": 0x03
}
with open(sys.argv[1], "r") as f:
for ln in f:
space = ln.strip().split(" ")
instruction = space[0]
rest = " ".join(space[1:])
arguments = [s.strip() for s in rest.split(",")]
program += [(instruction, arguments)]
swizzle_component = {
"x": 0,
"y": 1,
"z": 2,
"w": 3
}
def decode_reg_name(reg_name):
ireg = 0
upper = False
half = False
if reg_name[0] == 'r':
ireg = int(reg_name[1:])
elif reg_name[0] == 'h':
rreg = int(reg_name[2:])
# Decode half-register into its full register's half
ireg = rreg >> 1
upper = rreg & 1
half = True
else:
# Special case for load/store addresses
ireg = int(reg_name)
return (ireg, half, upper)
def standard_swizzle_from_parts(swizzle_parts):
swizzle_s = swizzle_parts[1] if len(swizzle_parts) > 1 else "xyzw"
swizzle = 0
for (i, c) in enumerate(swizzle_s):
swizzle |= swizzle_component[c] << (2 * i)
return swizzle
def mask_from_parts(mask_parts, large_mask):
mask_s = mask_parts[1] if len(mask_parts) > 1 else "xyzw"
if large_mask:
mask = sum([(3 << (2*swizzle_component[c]) if c in mask_s else 0) for c in "xyzw"])
else:
mask = sum([(1 << swizzle_component[c] if c in mask_s else 0) for c in "xyzw"])
return (mask, mask_s)
def decode_reg(reg):
if reg[0] == "#":
# Not actually a register, instead an immediate float
return (True, struct.unpack("H", struct.pack("e", float(reg[1:])))[0], 0, 0, 0, 0)
# Function call syntax used in abs() modifier
if reg[-1] == ')':
reg = reg[:-1]
swizzle_parts = reg.split(".")
reg_name = swizzle_parts[0]
modifiers = 0
if reg_name[0] == '-':
modifiers |= 2
reg_name = reg_name[1:]
if reg_name[0] == 'a':
modifiers |= 1
reg_name = reg_name[len("abs("):]
(ireg, half, upper) = decode_reg_name(reg_name)
return (False, ireg, standard_swizzle_from_parts(swizzle_parts), half, upper, modifiers)
def decode_masked_reg(reg, large_mask):
mask_parts = reg.split(".")
reg_name = mask_parts[0]
(ireg, half, upper) = decode_reg_name(reg_name)
(mask, mask_s) = mask_from_parts(mask_parts, large_mask)
component = max([0] + [swizzle_component[c] for c in "xyzw" if c in mask_s])
return (ireg, mask, component, half, upper)
# TODO: Fill these in XXX
# Texture pipeline registers in r28-r29
TEXTURE_BASE = 28
def decode_texture_reg_number(reg):
r = reg.split(".")[0]
if r[0] == "r":
return (True, int(r[1:]) - TEXTURE_BASE, 0)
else:
no = int(r[2:])
return (False, (no >> 1) - TEXTURE_BASE, no & 1)
def decode_texture_reg(reg):
(full, select, upper) = decode_texture_reg_number(reg)
# Swizzle mandatory for texture registers, afaict
swizzle = reg.split(".")[1]
swizzleL = swizzle_component[swizzle[0]]
swizzleR = swizzle_component[swizzle[1]]
return (full, select, upper, swizzleR, swizzleL)
def decode_texture_out_reg(reg):
(full, select, upper) = decode_texture_reg_number(reg)
(mask, _) = mask_from_parts(reg.split("."), False)
return (full, select, upper, mask)
instruction_stream = []
for p in program:
ins = p[0]
arguments = p[1]
family = ins_mod = ins.split(".")[0]
ins_op = (ins + ".").split(".")[1]
ins_outmod = (ins + "." + ".").split(".")[2]
try:
out_mod = t6xx_outmod[ins_outmod]
except:
out_mod = 0
if ins in t6xx_load_store_ops:
op = t6xx_load_store_ops[ins]
(reg, mask, component, half, upper) = decode_masked_reg(p[1][0], False)
(immediate, address, swizzle, half, upper, modifiers) = decode_reg(p[1][1])
unknown = int(p[1][2], 16)
b = (op << 0) | (reg << 8) | (mask << 13) | (swizzle << 17) | (unknown << 25) | (address << 51)
instruction_stream += [(LDST, b)]
elif ins_op in t6xx_alu_ops:
op = t6xx_alu_ops[ins_op]
(reg_out, mask, out_component, half0, upper0) = decode_masked_reg(p[1][0], True)
(_, reg_in1, swizzle1, half1, upper1, mod1) = decode_reg(p[1][1])
(immediate, reg_in2, swizzle2, half2, upper2, mod2) = decode_reg(p[1][2])
if immediate:
register_word = (reg_in1 << 0) | ((reg_in2 >> 11) << 5) | (reg_out << 10) | (1 << 15)
else:
register_word = (reg_in1 << 0) | (reg_in2 << 5) | (reg_out << 10)
if ins_mod in ["vadd", "vmul", "lut"]:
io_mode = t6xx_reg_mode["half" if half0 else "full"]
repsel = 0
i1half = half1
i2block = 0
output_override = 2 # NORMAL, TODO
wr_mask = 0
if (ins_outmod == "quarter"):
io_mode = t6xx_reg_mode["quarter"]
if half0:
# TODO: half actually
repsel = 2 * upper1
else:
repsel = upper1
if half0:
# Rare case...
(_, halfmask, _, _, _) = decode_masked_reg(p[1][0], False)
wr_mask = halfmask
else:
wr_mask = mask
if immediate:
# Inline constant: lower 11 bits
i2block = ((reg_in2 & 0xFF) << 3) | ((reg_in2 >> 8) & 0x7)
else:
if half0:
# TODO: replicate input 2 if half
pass
else:
# TODO: half selection
i2block = upper2 | (half2 << 2)
i2block |= swizzle2 << 3
# Extra modifier for some special cased stuff
try:
special = ins.split(".")[3]
if special == "low":
output_override = 0 # low
elif special == "fulllow":
# TODO: Not really a special case, just a bug?
io_mode = t6xx_reg_mode["full"]
output_override = 0 #low
wr_mask = 0xFF
except:
pass
instruction_word = (op << 0) | (io_mode << 8) | (mod1 << 10) | (repsel << 12) | (i1half << 14) | (swizzle1 << 15) | (mod2 << 23) | (i2block << 25) | (output_override << 36) | (out_mod << 38) | (wr_mask << 40)
elif ins_mod in ["sadd", "smul"]:
# TODO: What are these?
unknown2 = 0
unknown3 = 0
i1comp_block = 0
if half1:
i1comp_block = swizzle1 | (upper1 << 2)
else:
i1comp_block = swizzle1 << 1
i2block = 0
if immediate:
# Inline constant is splattered in a... bizarre way
i2block = (((reg_in2 >> 9) & 3) << 0) | (((reg_in2 >> 8) & 1) << 2) | (((reg_in2 >> 5) & 7) << 3) | (((reg_in2 >> 0) & 15) << 6)
else:
# TODO: half register
swizzle2 = (swizzle2 << 1) & 0x1F
i2block = (mod2 << 0) | ((not half2) << 2) | (swizzle2 << 3) | (unknown2 << 5)
outcomp_block = 0
if True:
outcomp_block = out_component << 1
else:
# TODO: half register
pass
instruction_word = (op << 0) | (mod1 << 8) | ((not half1) << 10) | (i1comp_block << 11) | (i2block << 14) | (unknown3 << 25) | (out_mod << 26) | ((not half0) << 28) | (outcomp_block) << 29
else:
instruction_word = op
instruction_stream += [(ALU, ins_mod, register_word, instruction_word)]
elif family == "texture":
# Texture ops use long series of modifiers to describe their needed
# capabilities, seperated by dots. Decode them here
parts = ins.split(".")
# First few modifiers are fixed, like an instruction name
tex_op = parts[1]
tex_fmt = parts[2]
# The remaining are variable, but strictly ordered
parts = parts[3:]
op = texture_op[tex_op]
# Some bits are defined directly in the modifier list
shadow = "shadow" in parts
cont = "cont" in parts
last = "last" in parts
has_filter = "raw" not in parts
# The remaining need order preserved since they have their own arguments
argument_parts = [part for part in parts if part not in ["shadow", "cont", "last", "raw"]]
bias_lod = 0
for argument, part in zip(argument_parts, arguments[4:]):
if argument == "bias":
bias_lod = int(float(part) * 256)
else:
print("Unknown argument: " + str(argument))
fmt = texture_fmt[tex_fmt]
has_offset = 0
magic1 = 1 # IDEK
magic2 = 2 # Where did this even come from?!
texture_handle = int(arguments[1][len("texture"):])
sampler_parts = arguments[2].split(".")
sampler_handle = int(sampler_parts[0][len("sampler"):])
swizzle0 = standard_swizzle_from_parts(sampler_parts)
(full0, select0, upper0, mask0) = decode_texture_out_reg(arguments[0])
(full1, select1, upper1, swizzleR1, swizzleL1) = decode_texture_reg(arguments[3])
tex = (op << 0) | (shadow << 6) | (cont << 8) | (last << 9) | (fmt << 10) | (has_offset << 15) | (has_filter << 16) | (select1 << 17) | (upper1 << 18) | (swizzleL1 << 19) | (swizzleR1 << 21) | (0 << 23) | (magic2 << 25) | (full0 << 29) | (magic1 << 30) | (select0 << 32) | (upper0 << 33) | (mask0 << 34) | (swizzle0 << 40) | (bias_lod << 72) | (texture_handle << 88) | (sampler_handle << 104)
instruction_stream += [(TEXTURE, tex)]
elif family == "br":
cond = ins.split(".")[2]
condition = branch_condition[cond]
bop = compact_branch_op[ins_op]
offset = int(arguments[0].split("->")[0])
# 2's complement and chill
if offset < 0:
offset = (1 << 7) - abs(offset)
# Find where we're going
dest_tag = int(arguments[0].split("->")[1])
br = (bop << 0) | (dest_tag << 3) | (offset << 7) | (condition << 14)
# TODO: Unconditional branch encoding
instruction_stream += [(ALU, "br", None, br)]
elif ins[1:] == "constants":
if ins[0] not in constant_types:
print("Unknown constant type " + str(constant_type))
break
(fmt, cast) = constant_types[ins[0]]
encoded = [struct.pack(fmt, cast(f)) for f in p[1]]
consts = bytearray()
for c in encoded:
consts += c
# consts must be exactly 4 quadwords, so pad with zeroes if necessary
consts += bytes(4*4 - len(consts))
instruction_stream += [(ALU, "constants", consts)]
# Emit from instruction stream
instructions = []
index = 0
while index < len(instruction_stream):
output_stream = bytearray()
ins = instruction_stream[index]
tag = ins[0]
can_prefetch = index + 1 < len(instruction_stream)
succeeding = None
if tag == LDST:
succeeding = instruction_stream[index + 1] if can_prefetch else None
parta = ins[1]
partb = None
if succeeding and succeeding[0] == LDST:
partb = succeeding[1]
index += 1
else:
partb = parta
parta = t6xx_load_store_ops["ld_st_noop"]
tag8 = t6xx_tag["load_store"]
ins = (partb << 68) | (parta << 8) | tag8
output_stream += (ins.to_bytes(16, "little"))
elif tag == TEXTURE:
tag8 = t6xx_tag["texture"]
ins = (ins[1] << 8) | tag8
output_stream += (ins.to_bytes(16, "little"))
elif tag == ALU:
# TODO: Combining ALU ops
emit_size = 4 # 32-bit tag always emitted
tag = 0
register_words = bytearray()
body_words = bytearray()
constant_words = None
last_alu_bit = 0
# Iterate through while there are ALU tags in strictly ascending order
while index < len(instruction_stream) and instruction_stream[index][0] == ALU and t6xx_alu_bits[instruction_stream[index][1]] > last_alu_bit:
ins = instruction_stream[index]
bit = t6xx_alu_bits[ins[1]]
last_alu_bit = bit
if ins[1] == "constants":
constant_words = ins[2]
else:
# Flag for the used part of the GPU
tag |= 1 << bit
# 16-bit register word, if present
if ins[2] is not None:
register_words += (ins[2].to_bytes(2, "little"))
emit_size += 2
size = int(t6xx_alu_size_bits[ins[1]] / 8)
body_words += (ins[3].to_bytes(size, "little"))
emit_size += size
index += 1
index -= 1 # fix off by one, from later loop increment
# Pad to nearest multiple of 4 words
padding = (16 - (emit_size & 15)) if (emit_size & 15) else 0
emit_size += padding
# emit_size includes constants
if constant_words:
emit_size += len(constant_words)
# Calculate tag given size
words = emit_size >> 2
tag |= t6xx_tag["alu" + str(words)]
# Actually emit, now that we can
output_stream += tag.to_bytes(4, "little")
output_stream += register_words
output_stream += body_words
output_stream += bytes(padding)
if constant_words:
output_stream += constant_words
instructions += [output_stream]
index += 1
# Assmebly over; just emit tags at this point
binary = bytearray()
for (idx, ins) in enumerate(instructions):
# Instruction prefetch
tag = 0
if idx + 1 < len(instructions):
tag = instructions[idx + 1][0] & 0xF
# Check for ALU special case
if is_tag_alu(tag) and idx + 2 == len(instructions):
tag = 1
else:
# Instruction stream over
tag = 1
ins[0] |= tag << 4
binary += ins
pprint.pprint(program)
with open(sys.argv[2], "wb") as f:
f.write(binary)