panfrost: Implement Midgard shader toolchain

This patch implements the free Midgard shader toolchain: the assembler,
the disassembler, and the NIR-based compiler. The assembler is a
standalone inaccessible Python script for reference purposes. The
disassembler and the compiler are implemented in C, accessible via the
standalone `midgard_compiler` binary. Later patches will use these
interfaces from the driver for online compilation.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Jason Ekstrand <jason@jlekstrand.net>
Acked-by: Rob Clark <robdclark@gmail.com>
Acked-by: Eric Anholt <eric@anholt.net>
Acked-by: Emil Velikov <emil.velikov@collabora.com>
This commit is contained in:
Alyssa Rosenzweig 2019-01-30 01:11:31 +00:00
parent 61d3ae6e0b
commit e67e072637
13 changed files with 6383 additions and 2 deletions

View File

@ -23,6 +23,10 @@ files_panfrost = files(
'pan_public.h',
'pan_screen.c',
'pan_screen.h',
'midgard/midgard_compile.c',
'midgard/cppwrap.cpp',
'midgard/disassemble.c',
)
inc_panfrost = [
@ -32,12 +36,25 @@ inc_panfrost = [
inc_drm_uapi,
inc_include,
inc_src,
include_directories('include')
include_directories('include'),
include_directories('midgard'),
]
midgard_nir_algebraic_c = custom_target(
'midgard_nir_algebraic.c',
input : 'midgard/midgard_nir_algebraic.py',
output : 'midgard_nir_algebraic.c',
command : [
prog_python, '@INPUT@',
'-p', join_paths(meson.source_root(), 'src/compiler/nir/'),
],
capture : true,
depend_files : nir_algebraic_py,
)
libpanfrost = static_library(
'panfrost',
[files_panfrost],
[files_panfrost, midgard_nir_algebraic_c],
dependencies: [
dep_thread,
idep_nir
@ -50,3 +67,26 @@ driver_panfrost = declare_dependency(
compile_args : ['-DGALLIUM_PANFROST', '-Wno-pointer-arith'],
link_with : [libpanfrost, libpanfrostwinsys],
)
files_midgard = files(
'midgard/midgard_compile.c',
'midgard/cppwrap.cpp',
'midgard/disassemble.c',
'midgard/cmdline.c',
)
midgard_compiler = executable(
'midgard_compiler',
[files_midgard, midgard_nir_algebraic_c],
include_directories : inc_panfrost,
dependencies : [
dep_thread,
idep_nir
],
link_with : [
libgallium,
libglsl_standalone,
libmesa_util
],
build_by_default : true
)

View File

@ -0,0 +1,643 @@
"""
Copyright (C) 2018 Alyssa Rosenzweig
Copyright (c) 2013 Connor Abbott (connor@abbott.cx)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
import sys
import pprint
import struct
program = []
# Definitions from cwabbott's tools
t6xx_alu_ops = {
"fadd": 0x10,
"fmul": 0x14,
"fmin": 0x28,
"fmax": 0x2C,
"fmov": 0x30,
"ffloor": 0x36,
"fceil": 0x37,
"fdot3": 0x3C,
"fdot3r": 0x3D,
"fdot4": 0x3E,
"freduce": 0x3F,
"iadd": 0x40,
"isub": 0x46,
"imul": 0x58,
"imov": 0x7B,
"feq": 0x80,
"fne": 0x81,
"flt": 0x82,
"fle": 0x83,
"f2i": 0x99,
"f2u8": 0x9C,
"u2f": 0xBC,
"ieq": 0xA0,
"ine": 0xA1,
"ilt": 0xA4,
"ile": 0xA5,
"iand": 0x70,
"ior": 0x71,
"inot": 0x72,
"iandnot": 0x74,
"ixor": 0x76,
"ball": 0xA9,
"bany": 0xB1,
"i2f": 0xB8,
"csel": 0xC5,
"fatan_pt2": 0xE8,
"frcp": 0xF0,
"frsqrt": 0xF2,
"fsqrt": 0xF3,
"fexp2": 0xF4,
"flog2": 0xF5,
"fsin": 0xF6,
"fcos": 0xF7,
"fatan2_pt1": 0xF9,
}
t6xx_alu_bits = {
"vmul": 17,
"sadd": 19,
"vadd": 21,
"smul": 23,
"lut": 25,
"br": 26,
"branch": 27,
"constants": 32
}
t6xx_alu_size_bits = {
"vmul": 48,
"sadd": 32,
"vadd": 48,
"smul": 32,
"lut": 48,
"br": 16,
"branch": 48
}
t6xx_outmod = {
"none": 0,
"pos": 1,
"int": 2,
"sat": 3
}
t6xx_reg_mode = {
"quarter": 0,
"half": 1,
"full": 2,
"double": 3
}
t6xx_dest_override = {
"lower": 0,
"upper": 1,
"none": 2
}
t6xx_load_store_ops = {
"ld_st_noop": 0x03,
"ld_attr_16": 0x95,
"ld_attr_32": 0x94,
"ld_vary_16": 0x99,
"ld_vary_32": 0x98,
"ld_uniform_16": 0xAC,
"ld_uniform_32": 0xB0,
"st_vary_16": 0xD5,
"st_vary_32": 0xD4,
"ld_color_buffer_8": 0xBA
}
t6xx_tag = {
"texture": 0x3,
"load_store": 0x5,
"alu4": 0x8,
"alu8": 0x9,
"alu12": 0xA,
"alu16": 0xB,
}
def is_tag_alu(tag):
return (tag >= t6xx_tag["alu4"]) and (tag <= t6xx_tag["alu16"])
# Just an enum
ALU = 0
LDST = 1
TEXTURE = 2
# Constant types supported, mapping the constant prefix to the Python format
# string and the coercion function
constant_types = {
"f": ("f", float),
"h": ("e", float),
"i": ("i", int),
"s": ("h", int)
}
compact_branch_op = {
"jump": 1,
"branch": 2,
"discard": 4,
"write": 7
}
branch_condition = {
"false": 1,
"true": 2,
"always": 3,
}
# TODO: What else?
texture_op = {
"normal": 0x11,
"texelfetch": 0x14
}
texture_fmt = {
"2d": 0x02,
"3d": 0x03
}
with open(sys.argv[1], "r") as f:
for ln in f:
space = ln.strip().split(" ")
instruction = space[0]
rest = " ".join(space[1:])
arguments = [s.strip() for s in rest.split(",")]
program += [(instruction, arguments)]
swizzle_component = {
"x": 0,
"y": 1,
"z": 2,
"w": 3
}
def decode_reg_name(reg_name):
ireg = 0
upper = False
half = False
if reg_name[0] == 'r':
ireg = int(reg_name[1:])
elif reg_name[0] == 'h':
rreg = int(reg_name[2:])
# Decode half-register into its full register's half
ireg = rreg >> 1
upper = rreg & 1
half = True
else:
# Special case for load/store addresses
ireg = int(reg_name)
return (ireg, half, upper)
def standard_swizzle_from_parts(swizzle_parts):
swizzle_s = swizzle_parts[1] if len(swizzle_parts) > 1 else "xyzw"
swizzle = 0
for (i, c) in enumerate(swizzle_s):
swizzle |= swizzle_component[c] << (2 * i)
return swizzle
def mask_from_parts(mask_parts, large_mask):
mask_s = mask_parts[1] if len(mask_parts) > 1 else "xyzw"
if large_mask:
mask = sum([(3 << (2*swizzle_component[c]) if c in mask_s else 0) for c in "xyzw"])
else:
mask = sum([(1 << swizzle_component[c] if c in mask_s else 0) for c in "xyzw"])
return (mask, mask_s)
def decode_reg(reg):
if reg[0] == "#":
# Not actually a register, instead an immediate float
return (True, struct.unpack("H", struct.pack("e", float(reg[1:])))[0], 0, 0, 0, 0)
# Function call syntax used in abs() modifier
if reg[-1] == ')':
reg = reg[:-1]
swizzle_parts = reg.split(".")
reg_name = swizzle_parts[0]
modifiers = 0
if reg_name[0] == '-':
modifiers |= 2
reg_name = reg_name[1:]
if reg_name[0] == 'a':
modifiers |= 1
reg_name = reg_name[len("abs("):]
(ireg, half, upper) = decode_reg_name(reg_name)
return (False, ireg, standard_swizzle_from_parts(swizzle_parts), half, upper, modifiers)
def decode_masked_reg(reg, large_mask):
mask_parts = reg.split(".")
reg_name = mask_parts[0]
(ireg, half, upper) = decode_reg_name(reg_name)
(mask, mask_s) = mask_from_parts(mask_parts, large_mask)
component = max([0] + [swizzle_component[c] for c in "xyzw" if c in mask_s])
return (ireg, mask, component, half, upper)
# TODO: Fill these in XXX
# Texture pipeline registers in r28-r29
TEXTURE_BASE = 28
def decode_texture_reg_number(reg):
r = reg.split(".")[0]
if r[0] == "r":
return (True, int(r[1:]) - TEXTURE_BASE, 0)
else:
no = int(r[2:])
return (False, (no >> 1) - TEXTURE_BASE, no & 1)
def decode_texture_reg(reg):
(full, select, upper) = decode_texture_reg_number(reg)
# Swizzle mandatory for texture registers, afaict
swizzle = reg.split(".")[1]
swizzleL = swizzle_component[swizzle[0]]
swizzleR = swizzle_component[swizzle[1]]
return (full, select, upper, swizzleR, swizzleL)
def decode_texture_out_reg(reg):
(full, select, upper) = decode_texture_reg_number(reg)
(mask, _) = mask_from_parts(reg.split("."), False)
return (full, select, upper, mask)
instruction_stream = []
for p in program:
ins = p[0]
arguments = p[1]
family = ins_mod = ins.split(".")[0]
ins_op = (ins + ".").split(".")[1]
ins_outmod = (ins + "." + ".").split(".")[2]
try:
out_mod = t6xx_outmod[ins_outmod]
except:
out_mod = 0
if ins in t6xx_load_store_ops:
op = t6xx_load_store_ops[ins]
(reg, mask, component, half, upper) = decode_masked_reg(p[1][0], False)
(immediate, address, swizzle, half, upper, modifiers) = decode_reg(p[1][1])
unknown = int(p[1][2], 16)
b = (op << 0) | (reg << 8) | (mask << 13) | (swizzle << 17) | (unknown << 25) | (address << 51)
instruction_stream += [(LDST, b)]
elif ins_op in t6xx_alu_ops:
op = t6xx_alu_ops[ins_op]
(reg_out, mask, out_component, half0, upper0) = decode_masked_reg(p[1][0], True)
(_, reg_in1, swizzle1, half1, upper1, mod1) = decode_reg(p[1][1])
(immediate, reg_in2, swizzle2, half2, upper2, mod2) = decode_reg(p[1][2])
if immediate:
register_word = (reg_in1 << 0) | ((reg_in2 >> 11) << 5) | (reg_out << 10) | (1 << 15)
else:
register_word = (reg_in1 << 0) | (reg_in2 << 5) | (reg_out << 10)
if ins_mod in ["vadd", "vmul", "lut"]:
io_mode = t6xx_reg_mode["half" if half0 else "full"]
repsel = 0
i1half = half1
i2block = 0
output_override = 2 # NORMAL, TODO
wr_mask = 0
if (ins_outmod == "quarter"):
io_mode = t6xx_reg_mode["quarter"]
if half0:
# TODO: half actually
repsel = 2 * upper1
else:
repsel = upper1
if half0:
# Rare case...
(_, halfmask, _, _, _) = decode_masked_reg(p[1][0], False)
wr_mask = halfmask
else:
wr_mask = mask
if immediate:
# Inline constant: lower 11 bits
i2block = ((reg_in2 & 0xFF) << 3) | ((reg_in2 >> 8) & 0x7)
else:
if half0:
# TODO: replicate input 2 if half
pass
else:
# TODO: half selection
i2block = upper2 | (half2 << 2)
i2block |= swizzle2 << 3
# Extra modifier for some special cased stuff
try:
special = ins.split(".")[3]
if special == "low":
output_override = 0 # low
elif special == "fulllow":
# TODO: Not really a special case, just a bug?
io_mode = t6xx_reg_mode["full"]
output_override = 0 #low
wr_mask = 0xFF
except:
pass
instruction_word = (op << 0) | (io_mode << 8) | (mod1 << 10) | (repsel << 12) | (i1half << 14) | (swizzle1 << 15) | (mod2 << 23) | (i2block << 25) | (output_override << 36) | (out_mod << 38) | (wr_mask << 40)
elif ins_mod in ["sadd", "smul"]:
# TODO: What are these?
unknown2 = 0
unknown3 = 0
i1comp_block = 0
if half1:
i1comp_block = swizzle1 | (upper1 << 2)
else:
i1comp_block = swizzle1 << 1
i2block = 0
if immediate:
# Inline constant is splattered in a... bizarre way
i2block = (((reg_in2 >> 9) & 3) << 0) | (((reg_in2 >> 8) & 1) << 2) | (((reg_in2 >> 5) & 7) << 3) | (((reg_in2 >> 0) & 15) << 6)
else:
# TODO: half register
swizzle2 = (swizzle2 << 1) & 0x1F
i2block = (mod2 << 0) | ((not half2) << 2) | (swizzle2 << 3) | (unknown2 << 5)
outcomp_block = 0
if True:
outcomp_block = out_component << 1
else:
# TODO: half register
pass
instruction_word = (op << 0) | (mod1 << 8) | ((not half1) << 10) | (i1comp_block << 11) | (i2block << 14) | (unknown3 << 25) | (out_mod << 26) | ((not half0) << 28) | (outcomp_block) << 29
else:
instruction_word = op
instruction_stream += [(ALU, ins_mod, register_word, instruction_word)]
elif family == "texture":
# Texture ops use long series of modifiers to describe their needed
# capabilities, seperated by dots. Decode them here
parts = ins.split(".")
# First few modifiers are fixed, like an instruction name
tex_op = parts[1]
tex_fmt = parts[2]
# The remaining are variable, but strictly ordered
parts = parts[3:]
op = texture_op[tex_op]
# Some bits are defined directly in the modifier list
shadow = "shadow" in parts
cont = "cont" in parts
last = "last" in parts
has_filter = "raw" not in parts
# The remaining need order preserved since they have their own arguments
argument_parts = [part for part in parts if part not in ["shadow", "cont", "last", "raw"]]
bias_lod = 0
for argument, part in zip(argument_parts, arguments[4:]):
if argument == "bias":
bias_lod = int(float(part) * 256)
else:
print("Unknown argument: " + str(argument))
fmt = texture_fmt[tex_fmt]
has_offset = 0
magic1 = 1 # IDEK
magic2 = 2 # Where did this even come from?!
texture_handle = int(arguments[1][len("texture"):])
sampler_parts = arguments[2].split(".")
sampler_handle = int(sampler_parts[0][len("sampler"):])
swizzle0 = standard_swizzle_from_parts(sampler_parts)
(full0, select0, upper0, mask0) = decode_texture_out_reg(arguments[0])
(full1, select1, upper1, swizzleR1, swizzleL1) = decode_texture_reg(arguments[3])
tex = (op << 0) | (shadow << 6) | (cont << 8) | (last << 9) | (fmt << 10) | (has_offset << 15) | (has_filter << 16) | (select1 << 17) | (upper1 << 18) | (swizzleL1 << 19) | (swizzleR1 << 21) | (0 << 23) | (magic2 << 25) | (full0 << 29) | (magic1 << 30) | (select0 << 32) | (upper0 << 33) | (mask0 << 34) | (swizzle0 << 40) | (bias_lod << 72) | (texture_handle << 88) | (sampler_handle << 104)
instruction_stream += [(TEXTURE, tex)]
elif family == "br":
cond = ins.split(".")[2]
condition = branch_condition[cond]
bop = compact_branch_op[ins_op]
offset = int(arguments[0].split("->")[0])
# 2's complement and chill
if offset < 0:
offset = (1 << 7) - abs(offset)
# Find where we're going
dest_tag = int(arguments[0].split("->")[1])
br = (bop << 0) | (dest_tag << 3) | (offset << 7) | (condition << 14)
# TODO: Unconditional branch encoding
instruction_stream += [(ALU, "br", None, br)]
elif ins[1:] == "constants":
if ins[0] not in constant_types:
print("Unknown constant type " + str(constant_type))
break
(fmt, cast) = constant_types[ins[0]]
encoded = [struct.pack(fmt, cast(f)) for f in p[1]]
consts = bytearray()
for c in encoded:
consts += c
# consts must be exactly 4 quadwords, so pad with zeroes if necessary
consts += bytes(4*4 - len(consts))
instruction_stream += [(ALU, "constants", consts)]
# Emit from instruction stream
instructions = []
index = 0
while index < len(instruction_stream):
output_stream = bytearray()
ins = instruction_stream[index]
tag = ins[0]
can_prefetch = index + 1 < len(instruction_stream)
succeeding = None
if tag == LDST:
succeeding = instruction_stream[index + 1] if can_prefetch else None
parta = ins[1]
partb = None
if succeeding and succeeding[0] == LDST:
partb = succeeding[1]
index += 1
else:
partb = parta
parta = t6xx_load_store_ops["ld_st_noop"]
tag8 = t6xx_tag["load_store"]
ins = (partb << 68) | (parta << 8) | tag8
output_stream += (ins.to_bytes(16, "little"))
elif tag == TEXTURE:
tag8 = t6xx_tag["texture"]
ins = (ins[1] << 8) | tag8
output_stream += (ins.to_bytes(16, "little"))
elif tag == ALU:
# TODO: Combining ALU ops
emit_size = 4 # 32-bit tag always emitted
tag = 0
register_words = bytearray()
body_words = bytearray()
constant_words = None
last_alu_bit = 0
# Iterate through while there are ALU tags in strictly ascending order
while index < len(instruction_stream) and instruction_stream[index][0] == ALU and t6xx_alu_bits[instruction_stream[index][1]] > last_alu_bit:
ins = instruction_stream[index]
bit = t6xx_alu_bits[ins[1]]
last_alu_bit = bit
if ins[1] == "constants":
constant_words = ins[2]
else:
# Flag for the used part of the GPU
tag |= 1 << bit
# 16-bit register word, if present
if ins[2] is not None:
register_words += (ins[2].to_bytes(2, "little"))
emit_size += 2
size = int(t6xx_alu_size_bits[ins[1]] / 8)
body_words += (ins[3].to_bytes(size, "little"))
emit_size += size
index += 1
index -= 1 # fix off by one, from later loop increment
# Pad to nearest multiple of 4 words
padding = (16 - (emit_size & 15)) if (emit_size & 15) else 0
emit_size += padding
# emit_size includes constants
if constant_words:
emit_size += len(constant_words)
# Calculate tag given size
words = emit_size >> 2
tag |= t6xx_tag["alu" + str(words)]
# Actually emit, now that we can
output_stream += tag.to_bytes(4, "little")
output_stream += register_words
output_stream += body_words
output_stream += bytes(padding)
if constant_words:
output_stream += constant_words
instructions += [output_stream]
index += 1
# Assmebly over; just emit tags at this point
binary = bytearray()
for (idx, ins) in enumerate(instructions):
# Instruction prefetch
tag = 0
if idx + 1 < len(instructions):
tag = instructions[idx + 1][0] & 0xF
# Check for ALU special case
if is_tag_alu(tag) and idx + 2 == len(instructions):
tag = 1
else:
# Instruction stream over
tag = 1
ins[0] |= tag << 4
binary += ins
pprint.pprint(program)
with open(sys.argv[2], "wb") as f:
f.write(binary)

View File

@ -0,0 +1,145 @@
/*
* Copyright (C) 2018 Alyssa Rosenzweig <alyssa@rosenzweig.io>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "compiler/glsl/standalone.h"
#include "compiler/glsl/glsl_to_nir.h"
#include "compiler/nir_types.h"
#include "midgard_compile.h"
#include "disassemble.h"
#include "util/u_dynarray.h"
#include "main/mtypes.h"
bool c_do_mat_op_to_vec(struct exec_list *instructions);
static void
finalise_to_disk(const char *filename, struct util_dynarray *data)
{
FILE *fp;
fp = fopen(filename, "wb");
fwrite(data->data, 1, data->size, fp);
fclose(fp);
util_dynarray_fini(data);
}
static void
compile_shader(char **argv)
{
struct gl_shader_program *prog;
nir_shader *nir;
struct standalone_options options = {
.glsl_version = 140,
.do_link = true,
};
prog = standalone_compile_shader(&options, 2, argv);
prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program->info.stage = MESA_SHADER_FRAGMENT;
for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
if (prog->_LinkedShaders[i] == NULL)
continue;
c_do_mat_op_to_vec(prog->_LinkedShaders[i]->ir);
}
midgard_program compiled;
nir = glsl_to_nir(prog, MESA_SHADER_VERTEX, &midgard_nir_options);
midgard_compile_shader_nir(nir, &compiled, false);
finalise_to_disk("vertex.bin", &compiled.compiled);
nir = glsl_to_nir(prog, MESA_SHADER_FRAGMENT, &midgard_nir_options);
midgard_compile_shader_nir(nir, &compiled, false);
finalise_to_disk("fragment.bin", &compiled.compiled);
}
static void
compile_blend(char **argv)
{
struct gl_shader_program *prog;
nir_shader *nir;
struct standalone_options options = {
.glsl_version = 140,
};
prog = standalone_compile_shader(&options, 1, argv);
prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program->info.stage = MESA_SHADER_FRAGMENT;
#if 0
for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
if (prog->_LinkedShaders[i] == NULL)
continue;
c_do_mat_op_to_vec(prog->_LinkedShaders[i]->ir);
}
#endif
midgard_program program;
nir = glsl_to_nir(prog, MESA_SHADER_FRAGMENT, &midgard_nir_options);
midgard_compile_shader_nir(nir, &program, true);
finalise_to_disk("blend.bin", &program.compiled);
}
static void
disassemble(const char *filename)
{
FILE *fp = fopen(filename, "rb");
assert(fp);
fseek(fp, 0, SEEK_END);
int filesize = ftell(fp);
rewind(fp);
unsigned char *code = malloc(filesize);
fread(code, 1, filesize, fp);
fclose(fp);
disassemble_midgard(code, filesize);
free(code);
}
int
main(int argc, char **argv)
{
if (argc < 2) {
fprintf(stderr, "Usage: midgard_compiler command [args]\n");
fprintf(stderr, "midgard_compiler compile program.vert program.frag\n");
fprintf(stderr, "midgard_compiler blend program.blend\n");
fprintf(stderr, "midgard_compiler disasm binary.bin\n");
exit(1);
}
if (strcmp(argv[1], "compile") == 0) {
compile_shader(&argv[2]);
} else if (strcmp(argv[1], "blend") == 0) {
compile_blend(&argv[2]);
} else if (strcmp(argv[1], "disasm") == 0) {
disassemble(argv[2]);
} else {
fprintf(stderr, "Unknown command\n");
exit(1);
}
}

View File

@ -0,0 +1,9 @@
struct exec_list;
bool do_mat_op_to_vec(struct exec_list *instructions);
extern "C" {
bool c_do_mat_op_to_vec(struct exec_list *instructions) {
return do_mat_op_to_vec(instructions);
}
};

View File

@ -0,0 +1,986 @@
/* Author(s):
* Connor Abbott
* Alyssa Rosenzweig
*
* Copyright (c) 2013 Connor Abbott (connor@abbott.cx)
* Copyright (c) 2018 Alyssa Rosenzweig (alyssa@rosenzweig.io)
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <stdio.h>
#include <stdint.h>
#include <assert.h>
#include <inttypes.h>
#include <string.h>
#include "midgard.h"
#include "midgard-parse.h"
#include "disassemble.h"
#include "util/half_float.h"
#define DEFINE_CASE(define, str) case define: { printf(str); break; }
static bool is_instruction_int = false;
static void
print_alu_opcode(midgard_alu_op op)
{
bool int_op = false;
if (alu_opcode_names[op]) {
printf("%s", alu_opcode_names[op]);
int_op = alu_opcode_names[op][0] == 'i';
} else
printf("alu_op_%02X", op);
/* For constant analysis */
is_instruction_int = int_op;
}
static void
print_ld_st_opcode(midgard_load_store_op op)
{
if (load_store_opcode_names[op])
printf("%s", load_store_opcode_names[op]);
else
printf("ldst_op_%02X", op);
}
static bool is_embedded_constant_half = false;
static bool is_embedded_constant_int = false;
static void
print_reg(unsigned reg, bool half)
{
/* Perform basic static analysis for expanding constants correctly */
if (half && (reg >> 1) == 26) {
is_embedded_constant_half = true;
is_embedded_constant_int = is_instruction_int;
} else if (!half && reg == 26) {
is_embedded_constant_int = is_instruction_int;
}
if (half)
printf("h");
printf("r%u", reg);
}
static char *outmod_names[4] = {
"",
".pos",
"",
".sat"
};
static void
print_outmod(midgard_outmod outmod)
{
printf("%s", outmod_names[outmod]);
}
static void
print_quad_word(uint32_t *words, unsigned tabs)
{
unsigned i;
for (i = 0; i < 4; i++)
printf("0x%08X%s ", words[i], i == 3 ? "" : ",");
printf("\n");
}
static void
print_vector_src(unsigned src_binary, bool out_high,
bool out_half, unsigned reg)
{
midgard_vector_alu_src *src = (midgard_vector_alu_src *)&src_binary;
if (src->negate)
printf("-");
if (src->abs)
printf("abs(");
//register
if (out_half) {
if (src->half)
printf(" /* half */ ");
unsigned half_reg;
if (out_high) {
if (src->rep_low)
half_reg = reg * 2;
else
half_reg = reg * 2 + 1;
if (src->rep_high)
printf(" /* rep_high */ ");
} else {
if (src->rep_high)
half_reg = reg * 2 + 1;
else
half_reg = reg * 2;
if (src->rep_low)
printf(" /* rep_low */ ");
}
print_reg(half_reg, true);
} else {
if (src->rep_high)
printf(" /* rep_high */ ");
if (src->half)
print_reg(reg * 2 + src->rep_low, true);
else {
if (src->rep_low)
printf(" /* rep_low */ ");
print_reg(reg, false);
}
}
//swizzle
if (src->swizzle != 0xE4) { //default swizzle
unsigned i;
static const char c[4] = "xyzw";
printf(".");
for (i = 0; i < 4; i++)
printf("%c", c[(src->swizzle >> (i * 2)) & 3]);
}
if (src->abs)
printf(")");
}
static uint16_t
decode_vector_imm(unsigned src2_reg, unsigned imm)
{
uint16_t ret;
ret = src2_reg << 11;
ret |= (imm & 0x7) << 8;
ret |= (imm >> 3) & 0xFF;
return ret;
}
static void
print_immediate(uint16_t imm)
{
if (is_instruction_int)
printf("#%d", imm);
else
printf("#%g", _mesa_half_to_float(imm));
}
static void
print_vector_field(const char *name, uint16_t *words, uint16_t reg_word,
unsigned tabs)
{
midgard_reg_info *reg_info = (midgard_reg_info *)&reg_word;
midgard_vector_alu *alu_field = (midgard_vector_alu *) words;
if (alu_field->reg_mode != midgard_reg_mode_half &&
alu_field->reg_mode != midgard_reg_mode_full) {
printf("unknown reg mode %u\n", alu_field->reg_mode);
}
/* For now, prefix instruction names with their unit, until we
* understand how this works on a deeper level */
printf("%s.", name);
print_alu_opcode(alu_field->op);
print_outmod(alu_field->outmod);
printf(" ");
bool half, out_half, out_high = false;
unsigned mask;
half = (alu_field->reg_mode == midgard_reg_mode_half);
if (half) {
if (alu_field->mask & 0xF) {
out_high = false;
if ((alu_field->mask & 0xF0))
printf("/* %X */ ", alu_field->mask);
mask = alu_field->mask;
} else {
out_high = true;
mask = alu_field->mask >> 4;
}
} else {
mask = alu_field->mask & 1;
mask |= (alu_field->mask & 4) >> 1;
mask |= (alu_field->mask & 16) >> 2;
mask |= (alu_field->mask & 64) >> 3;
}
out_half = half;
if (alu_field->dest_override != midgard_dest_override_none) {
if (out_half)
printf("/* half */ ");
out_half = true;
if (alu_field->dest_override == midgard_dest_override_lower)
out_high = false;
else if (alu_field->dest_override == midgard_dest_override_upper)
out_high = true;
else
assert(0);
}
if (out_half) {
if (out_high)
print_reg(2 * reg_info->out_reg + 1, true);
else
print_reg(2 * reg_info->out_reg, true);
} else
print_reg(reg_info->out_reg, false);
if (mask != 0xF) {
unsigned i;
static const char c[4] = "xyzw";
printf(".");
for (i = 0; i < 4; i++)
if (mask & (1 << i))
printf("%c", c[i]);
}
printf(", ");
print_vector_src(alu_field->src1, out_high, half, reg_info->src1_reg);
printf(", ");
if (reg_info->src2_imm) {
uint16_t imm = decode_vector_imm(reg_info->src2_reg, alu_field->src2 >> 2);
print_immediate(imm);
} else {
print_vector_src(alu_field->src2, out_high, half,
reg_info->src2_reg);
}
printf("\n");
}
static void
print_scalar_src(unsigned src_binary, unsigned reg)
{
midgard_scalar_alu_src *src = (midgard_scalar_alu_src *)&src_binary;
if (src->negate)
printf("-");
if (src->abs)
printf("abs(");
if (src->full)
print_reg(reg, false);
else
print_reg(reg * 2 + (src->component >> 2), true);
static const char c[4] = "xyzw";
\
printf(".%c", c[src->full ? src->component >> 1 : src->component & 3]);
if (src->abs)
printf(")");
}
static uint16_t
decode_scalar_imm(unsigned src2_reg, unsigned imm)
{
uint16_t ret;
ret = src2_reg << 11;
ret |= (imm & 3) << 9;
ret |= (imm & 4) << 6;
ret |= (imm & 0x38) << 2;
ret |= imm >> 6;
return ret;
}
static void
print_scalar_field(const char *name, uint16_t *words, uint16_t reg_word,
unsigned tabs)
{
midgard_reg_info *reg_info = (midgard_reg_info *)&reg_word;
midgard_scalar_alu *alu_field = (midgard_scalar_alu *) words;
if (alu_field->unknown)
printf("scalar ALU unknown bit set\n");
printf("%s.", name);
print_alu_opcode(alu_field->op);
print_outmod(alu_field->outmod);
printf(" ");
if (alu_field->output_full)
print_reg(reg_info->out_reg, false);
else
print_reg(reg_info->out_reg * 2 + (alu_field->output_component >> 2),
true);
static const char c[4] = "xyzw";
printf(".%c, ",
c[alu_field->output_full ? alu_field->output_component >> 1 :
alu_field->output_component & 3]);
print_scalar_src(alu_field->src1, reg_info->src1_reg);
printf(", ");
if (reg_info->src2_imm) {
uint16_t imm = decode_scalar_imm(reg_info->src2_reg,
alu_field->src2);
print_immediate(imm);
} else
print_scalar_src(alu_field->src2, reg_info->src2_reg);
printf("\n");
}
static void
print_branch_op(int op)
{
switch (op) {
case midgard_jmp_writeout_op_branch_cond:
printf("cond.");
break;
case midgard_jmp_writeout_op_writeout:
printf("write.");
break;
case midgard_jmp_writeout_op_discard:
printf("discard.");
break;
default:
printf("unk%d.", op);
break;
}
}
static void
print_branch_cond(int cond)
{
switch (cond) {
case midgard_condition_write0:
printf("write0");
break;
case midgard_condition_false:
printf("false");
break;
case midgard_condition_true:
printf("true");
break;
case midgard_condition_always:
printf("always");
break;
default:
break;
}
}
static void
print_compact_branch_writeout_field(uint16_t word)
{
midgard_jmp_writeout_op op = word & 0x7;
switch (op) {
case midgard_jmp_writeout_op_branch_uncond: {
midgard_branch_uncond br_uncond;
memcpy((char *) &br_uncond, (char *) &word, sizeof(br_uncond));
printf("br.uncond ");
if (br_uncond.unknown != 1)
printf("unknown:%d, ", br_uncond.unknown);
if (br_uncond.offset >= 0)
printf("+");
printf("%d", br_uncond.offset);
printf(" -> %X\n", br_uncond.dest_tag);
break;
}
case midgard_jmp_writeout_op_branch_cond:
case midgard_jmp_writeout_op_writeout:
case midgard_jmp_writeout_op_discard:
default: {
midgard_branch_cond br_cond;
memcpy((char *) &br_cond, (char *) &word, sizeof(br_cond));
printf("br.");
print_branch_op(br_cond.op);
print_branch_cond(br_cond.cond);
printf(" ");
if (br_cond.offset >= 0)
printf("+");
printf("%d", br_cond.offset);
printf(" -> %X\n", br_cond.dest_tag);
break;
}
}
}
static void
print_extended_branch_writeout_field(uint8_t *words)
{
midgard_branch_extended br;
memcpy((char *) &br, (char *) words, sizeof(br));
printf("br.");
print_branch_op(br.op);
print_branch_cond(br.cond);
/* XXX: This can't be right */
if (br.unknown)
printf(".unknown%d\n", br.unknown);
if (br.zero)
printf(".zero%d\n", br.zero);
printf(" ");
if (br.offset >= 0)
printf("+");
printf("%d", br.offset);
printf(" -> %X\n", br.dest_tag);
}
static unsigned
num_alu_fields_enabled(uint32_t control_word)
{
unsigned ret = 0;
if ((control_word >> 17) & 1)
ret++;
if ((control_word >> 19) & 1)
ret++;
if ((control_word >> 21) & 1)
ret++;
if ((control_word >> 23) & 1)
ret++;
if ((control_word >> 25) & 1)
ret++;
return ret;
}
static float
float_bitcast(uint32_t integer)
{
union {
uint32_t i;
float f;
} v;
v.i = integer;
return v.f;
}
static void
print_alu_word(uint32_t *words, unsigned num_quad_words,
unsigned tabs)
{
uint32_t control_word = words[0];
uint16_t *beginning_ptr = (uint16_t *)(words + 1);
unsigned num_fields = num_alu_fields_enabled(control_word);
uint16_t *word_ptr = beginning_ptr + num_fields;
unsigned num_words = 2 + num_fields;
if ((control_word >> 16) & 1)
printf("unknown bit 16 enabled\n");
if ((control_word >> 17) & 1) {
print_vector_field("vmul", word_ptr, *beginning_ptr, tabs);
beginning_ptr += 1;
word_ptr += 3;
num_words += 3;
}
if ((control_word >> 18) & 1)
printf("unknown bit 18 enabled\n");
if ((control_word >> 19) & 1) {
print_scalar_field("sadd", word_ptr, *beginning_ptr, tabs);
beginning_ptr += 1;
word_ptr += 2;
num_words += 2;
}
if ((control_word >> 20) & 1)
printf("unknown bit 20 enabled\n");
if ((control_word >> 21) & 1) {
print_vector_field("vadd", word_ptr, *beginning_ptr, tabs);
beginning_ptr += 1;
word_ptr += 3;
num_words += 3;
}
if ((control_word >> 22) & 1)
printf("unknown bit 22 enabled\n");
if ((control_word >> 23) & 1) {
print_scalar_field("smul", word_ptr, *beginning_ptr, tabs);
beginning_ptr += 1;
word_ptr += 2;
num_words += 2;
}
if ((control_word >> 24) & 1)
printf("unknown bit 24 enabled\n");
if ((control_word >> 25) & 1) {
print_vector_field("lut", word_ptr, *beginning_ptr, tabs);
beginning_ptr += 1;
word_ptr += 3;
num_words += 3;
}
if ((control_word >> 26) & 1) {
print_compact_branch_writeout_field(*word_ptr);
word_ptr += 1;
num_words += 1;
}
if ((control_word >> 27) & 1) {
print_extended_branch_writeout_field((uint8_t *) word_ptr);
word_ptr += 3;
num_words += 3;
}
if (num_quad_words > (num_words + 7) / 8) {
assert(num_quad_words == (num_words + 15) / 8);
//Assume that the extra quadword is constants
void *consts = words + (4 * num_quad_words - 4);
if (is_embedded_constant_int) {
if (is_embedded_constant_half) {
int16_t *sconsts = (int16_t *) consts;
printf("sconstants %d, %d, %d, %d\n",
sconsts[0],
sconsts[1],
sconsts[2],
sconsts[3]);
} else {
int32_t *iconsts = (int32_t *) consts;
printf("iconstants %d, %d, %d, %d\n",
iconsts[0],
iconsts[1],
iconsts[2],
iconsts[3]);
}
} else {
if (is_embedded_constant_half) {
uint16_t *hconsts = (uint16_t *) consts;
printf("hconstants %g, %g, %g, %g\n",
_mesa_half_to_float(hconsts[0]),
_mesa_half_to_float(hconsts[1]),
_mesa_half_to_float(hconsts[2]),
_mesa_half_to_float(hconsts[3]));
} else {
uint32_t *fconsts = (uint32_t *) consts;
printf("fconstants %g, %g, %g, %g\n",
float_bitcast(fconsts[0]),
float_bitcast(fconsts[1]),
float_bitcast(fconsts[2]),
float_bitcast(fconsts[3]));
}
}
}
}
/* Swizzle/mask formats are common between load/store ops and texture ops, it
* looks like... */
static void
print_swizzle(uint32_t swizzle)
{
unsigned i;
if (swizzle != 0xE4) {
printf(".");
for (i = 0; i < 4; i++)
printf("%c", "xyzw"[(swizzle >> (2 * i)) & 3]);
}
}
static void
print_mask(uint32_t mask)
{
unsigned i;
if (mask != 0xF) {
printf(".");
for (i = 0; i < 4; i++)
if (mask & (1 << i))
printf("%c", "xyzw"[i]);
/* Handle degenerate case */
if (mask == 0)
printf("0");
}
}
static void
print_varying_parameters(midgard_load_store_word *word)
{
midgard_varying_parameter param;
unsigned v = word->varying_parameters;
memcpy(&param, &v, sizeof(param));
if (param.is_varying) {
/* If a varying, there are qualifiers */
if (param.flat)
printf(".flat");
if (param.interpolation != midgard_interp_default) {
if (param.interpolation == midgard_interp_centroid)
printf(".centroid");
else
printf(".interp%d", param.interpolation);
}
} else if (param.flat || param.interpolation) {
printf(" /* is_varying not set but varying metadata attached */");
}
if (param.zero1 || param.zero2)
printf(" /* zero tripped, %d %d */ ", param.zero1, param.zero2);
}
static bool
is_op_varying(unsigned op)
{
switch (op) {
case midgard_op_store_vary_16:
case midgard_op_store_vary_32:
case midgard_op_load_vary_16:
case midgard_op_load_vary_32:
return true;
}
return false;
}
static void
print_load_store_instr(uint64_t data,
unsigned tabs)
{
midgard_load_store_word *word = (midgard_load_store_word *) &data;
print_ld_st_opcode(word->op);
if (is_op_varying(word->op))
print_varying_parameters(word);
printf(" r%d", word->reg);
print_mask(word->mask);
int address = word->address;
if (word->op == midgard_op_load_uniform_32) {
/* Uniforms use their own addressing scheme */
int lo = word->varying_parameters >> 7;
int hi = word->address;
/* TODO: Combine fields logically */
address = (hi << 3) | lo;
}
printf(", %d", address);
print_swizzle(word->swizzle);
printf(", 0x%X\n", word->unknown);
}
static void
print_load_store_word(uint32_t *word, unsigned tabs)
{
midgard_load_store *load_store = (midgard_load_store *) word;
if (load_store->word1 != 3) {
print_load_store_instr(load_store->word1, tabs);
}
if (load_store->word2 != 3) {
print_load_store_instr(load_store->word2, tabs);
}
}
static void
print_texture_reg(bool full, bool select, bool upper)
{
if (full)
printf("r%d", REG_TEX_BASE + select);
else
printf("hr%d", (REG_TEX_BASE + select) * 2 + upper);
if (full && upper)
printf("// error: out full / upper mutually exclusive\n");
}
static void
print_texture_format(int format)
{
/* Act like a modifier */
printf(".");
switch (format) {
DEFINE_CASE(TEXTURE_2D, "2d");
DEFINE_CASE(TEXTURE_3D, "3d");
default:
printf("fmt_%d", format);
break;
}
}
static void
print_texture_op(int format)
{
/* Act like a modifier */
printf(".");
switch (format) {
DEFINE_CASE(TEXTURE_OP_NORMAL, "normal");
DEFINE_CASE(TEXTURE_OP_TEXEL_FETCH, "texelfetch");
default:
printf("op_%d", format);
break;
}
}
#undef DEFINE_CASE
static void
print_texture_word(uint32_t *word, unsigned tabs)
{
midgard_texture_word *texture = (midgard_texture_word *) word;
/* Instruction family, like ALU words have theirs */
printf("texture");
/* Broad category of texture operation in question */
print_texture_op(texture->op);
/* Specific format in question */
print_texture_format(texture->format);
/* Instruction "modifiers" parallel the ALU instructions. First group
* are modifiers that act alone */
if (!texture->filter)
printf(".raw");
if (texture->shadow)
printf(".shadow");
if (texture->cont)
printf(".cont");
if (texture->last)
printf(".last");
/* Second set are modifiers which take an extra argument each */
if (texture->has_offset)
printf(".offset");
if (texture->bias)
printf(".bias");
printf(" ");
print_texture_reg(texture->out_full, texture->out_reg_select, texture->out_upper);
print_mask(texture->mask);
printf(", ");
printf("texture%d, ", texture->texture_handle);
printf("sampler%d", texture->sampler_handle);
print_swizzle(texture->swizzle);
printf(", ");
print_texture_reg(/*texture->in_reg_full*/true, texture->in_reg_select, texture->in_reg_upper);
printf(".%c%c, ", "xyzw"[texture->in_reg_swizzle_left],
"xyzw"[texture->in_reg_swizzle_right]);
/* TODO: can offsets be full words? */
if (texture->has_offset) {
print_texture_reg(false, texture->offset_reg_select, texture->offset_reg_upper);
printf(", ");
}
if (texture->bias)
printf("%f, ", texture->bias / 256.0f);
printf("\n");
/* While not zero in general, for these simple instructions the
* following unknowns are zero, so we don't include them */
if (texture->unknown1 ||
texture->unknown2 ||
texture->unknown3 ||
texture->unknown4 ||
texture->unknownA ||
texture->unknownB ||
texture->unknown8 ||
texture->unknown9) {
printf("// unknown1 = 0x%x\n", texture->unknown1);
printf("// unknown2 = 0x%x\n", texture->unknown2);
printf("// unknown3 = 0x%x\n", texture->unknown3);
printf("// unknown4 = 0x%x\n", texture->unknown4);
printf("// unknownA = 0x%x\n", texture->unknownA);
printf("// unknownB = 0x%x\n", texture->unknownB);
printf("// unknown8 = 0x%x\n", texture->unknown8);
printf("// unknown9 = 0x%x\n", texture->unknown9);
}
/* Similarly, if no offset is applied, these are zero. If an offset
* -is- applied, or gradients are used, etc, these are nonzero but
* largely unknown still. */
if (texture->offset_unknown1 ||
texture->offset_reg_select ||
texture->offset_reg_upper ||
texture->offset_unknown4 ||
texture->offset_unknown5 ||
texture->offset_unknown6 ||
texture->offset_unknown7 ||
texture->offset_unknown8 ||
texture->offset_unknown9) {
printf("// offset_unknown1 = 0x%x\n", texture->offset_unknown1);
printf("// offset_reg_select = 0x%x\n", texture->offset_reg_select);
printf("// offset_reg_upper = 0x%x\n", texture->offset_reg_upper);
printf("// offset_unknown4 = 0x%x\n", texture->offset_unknown4);
printf("// offset_unknown5 = 0x%x\n", texture->offset_unknown5);
printf("// offset_unknown6 = 0x%x\n", texture->offset_unknown6);
printf("// offset_unknown7 = 0x%x\n", texture->offset_unknown7);
printf("// offset_unknown8 = 0x%x\n", texture->offset_unknown8);
printf("// offset_unknown9 = 0x%x\n", texture->offset_unknown9);
}
/* Don't blow up */
if (texture->unknown7 != 0x1)
printf("// (!) unknown7 = %d\n", texture->unknown7);
}
void
disassemble_midgard(uint8_t *code, size_t size)
{
uint32_t *words = (uint32_t *) code;
unsigned num_words = size / 4;
int tabs = 0;
bool prefetch_flag = false;
unsigned i = 0;
while (i < num_words) {
unsigned num_quad_words = midgard_word_size[words[i] & 0xF];
switch (midgard_word_types[words[i] & 0xF]) {
case midgard_word_type_texture:
print_texture_word(&words[i], tabs);
break;
case midgard_word_type_load_store:
print_load_store_word(&words[i], tabs);
break;
case midgard_word_type_alu:
print_alu_word(&words[i], num_quad_words, tabs);
if (prefetch_flag)
return;
/* Reset word static analysis state */
is_embedded_constant_half = false;
is_embedded_constant_int = false;
break;
default:
printf("Unknown word type %u:\n", words[i] & 0xF);
num_quad_words = 1;
print_quad_word(&words[i], tabs);
printf("\n");
break;
}
printf("\n");
unsigned next = (words[i] & 0xF0) >> 4;
i += 4 * num_quad_words;
/* Break based on instruction prefetch flag */
if (i < num_words && next == 1) {
prefetch_flag = true;
if (midgard_word_types[words[i] & 0xF] != midgard_word_type_alu)
return;
}
}
return;
}

View File

@ -0,0 +1,2 @@
#include <stddef.h>
void disassemble_midgard(uint8_t *code, size_t size);

View File

@ -0,0 +1,236 @@
/* Author(s):
* Alyssa Rosenzweig
*
* Copyright (c) 2018 Alyssa Rosenzweig (alyssa@rosenzweig.io)
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/* Some constants and macros not found in the disassembler */
#define OP_IS_STORE(op) (\
op == midgard_op_store_vary_16 || \
op == midgard_op_store_vary_32 \
)
/* ALU control words are single bit fields with a lot of space */
#define ALU_ENAB_VEC_MUL (1 << 17)
#define ALU_ENAB_SCAL_ADD (1 << 19)
#define ALU_ENAB_VEC_ADD (1 << 21)
#define ALU_ENAB_SCAL_MUL (1 << 23)
#define ALU_ENAB_VEC_LUT (1 << 25)
#define ALU_ENAB_BR_COMPACT (1 << 26)
#define ALU_ENAB_BRANCH (1 << 27)
/* Other opcode properties that don't conflict with the ALU_ENABs, non-ISA */
/* Denotes an opcode that takes a vector input with a fixed-number of
* channels, but outputs to only a single output channel, like dot products.
* For these, to determine the effective mask, this quirk can be set. We have
* an intentional off-by-one (a la MALI_POSITIVE), since 0-channel makes no
* sense but we need to fit 4 channels in 2-bits. Similarly, 1-channel doesn't
* make sense (since then why are we quirked?), so that corresponds to "no
* count set" */
#define OP_CHANNEL_COUNT(c) ((c - 1) << 0)
#define GET_CHANNEL_COUNT(c) ((c & (0x3 << 0)) ? ((c & (0x3 << 0)) + 1) : 0)
/* Vector-independant shorthands for the above; these numbers are arbitrary and
* not from the ISA. Convert to the above with unit_enum_to_midgard */
#define UNIT_MUL 0
#define UNIT_ADD 1
#define UNIT_LUT 2
/* 4-bit type tags */
#define TAG_TEXTURE_4 0x3
#define TAG_LOAD_STORE_4 0x5
#define TAG_ALU_4 0x8
#define TAG_ALU_8 0x9
#define TAG_ALU_12 0xA
#define TAG_ALU_16 0xB
/* Special register aliases */
#define MAX_WORK_REGISTERS 16
/* Uniforms are begin at (REGISTER_UNIFORMS - uniform_count) */
#define REGISTER_UNIFORMS 24
#define REGISTER_UNUSED 24
#define REGISTER_CONSTANT 26
#define REGISTER_VARYING_BASE 26
#define REGISTER_OFFSET 27
#define REGISTER_TEXTURE_BASE 28
#define REGISTER_SELECT 31
/* Special uniforms used for e.g. vertex epilogues */
#define SPECIAL_UNIFORM_BASE (1 << 24)
#define UNIFORM_VIEWPORT (SPECIAL_UNIFORM_BASE + 0)
/* SSA helper aliases to mimic the registers. UNUSED_0 encoded as an inline
* constant. UNUSED_1 encoded as REGISTER_UNUSED */
#define SSA_UNUSED_0 0
#define SSA_UNUSED_1 -2
#define SSA_FIXED_SHIFT 24
#define SSA_FIXED_REGISTER(reg) ((1 + reg) << SSA_FIXED_SHIFT)
#define SSA_REG_FROM_FIXED(reg) ((reg >> SSA_FIXED_SHIFT) - 1)
#define SSA_FIXED_MINIMUM SSA_FIXED_REGISTER(0)
/* Swizzle support */
#define SWIZZLE(A, B, C, D) ((D << 6) | (C << 4) | (B << 2) | (A << 0))
#define SWIZZLE_FROM_ARRAY(r) SWIZZLE(r[0], r[1], r[2], r[3])
#define COMPONENT_X 0x0
#define COMPONENT_Y 0x1
#define COMPONENT_Z 0x2
#define COMPONENT_W 0x3
/* See ISA notes */
#define LDST_NOP (3)
/* Is this opcode that of an integer? */
static bool
midgard_is_integer_op(int op)
{
switch (op) {
case midgard_alu_op_iadd:
case midgard_alu_op_ishladd:
case midgard_alu_op_isub:
case midgard_alu_op_imul:
case midgard_alu_op_imin:
case midgard_alu_op_imax:
case midgard_alu_op_iasr:
case midgard_alu_op_ilsr:
case midgard_alu_op_ishl:
case midgard_alu_op_iand:
case midgard_alu_op_ior:
case midgard_alu_op_inot:
case midgard_alu_op_iandnot:
case midgard_alu_op_ixor:
case midgard_alu_op_imov:
//case midgard_alu_op_f2i:
//case midgard_alu_op_f2u:
case midgard_alu_op_ieq:
case midgard_alu_op_ine:
case midgard_alu_op_ilt:
case midgard_alu_op_ile:
case midgard_alu_op_iball_eq:
case midgard_alu_op_ibany_neq:
//case midgard_alu_op_i2f:
//case midgard_alu_op_u2f:
case midgard_alu_op_icsel:
return true;
default:
return false;
}
}
/* There are five ALU units: VMUL, VADD, SMUL, SADD, LUT. A given opcode is
* implemented on some subset of these units (or occassionally all of them).
* This table encodes a bit mask of valid units for each opcode, so the
* scheduler can figure where to plonk the instruction. */
/* Shorthands for each unit */
#define UNIT_VMUL ALU_ENAB_VEC_MUL
#define UNIT_SADD ALU_ENAB_SCAL_ADD
#define UNIT_VADD ALU_ENAB_VEC_ADD
#define UNIT_SMUL ALU_ENAB_SCAL_MUL
#define UNIT_VLUT ALU_ENAB_VEC_LUT
/* Shorthands for usual combinations of units. LUT is intentionally excluded
* since it's nutty. */
#define UNITS_MUL (UNIT_VMUL | UNIT_SMUL)
#define UNITS_ADD (UNIT_VADD | UNIT_SADD)
#define UNITS_ALL (UNITS_MUL | UNITS_ADD)
#define UNITS_SCALAR (UNIT_SADD | UNIT_SMUL)
#define UNITS_VECTOR (UNIT_VMUL | UNIT_VADD)
#define UNITS_ANY_VECTOR (UNITS_VECTOR | UNIT_VLUT)
static int alu_opcode_props[256] = {
[midgard_alu_op_fadd] = UNITS_ADD,
[midgard_alu_op_fmul] = UNITS_MUL | UNIT_VLUT,
[midgard_alu_op_fmin] = UNITS_MUL | UNITS_ADD,
[midgard_alu_op_fmax] = UNITS_MUL | UNITS_ADD,
[midgard_alu_op_imin] = UNITS_ALL,
[midgard_alu_op_imax] = UNITS_ALL,
[midgard_alu_op_fmov] = UNITS_ALL | UNIT_VLUT,
[midgard_alu_op_ffloor] = UNITS_ADD,
[midgard_alu_op_fceil] = UNITS_ADD,
/* Though they output a scalar, they need to run on a vector unit
* since they process vectors */
[midgard_alu_op_fdot3] = UNIT_VMUL | OP_CHANNEL_COUNT(3),
[midgard_alu_op_fdot4] = UNIT_VMUL | OP_CHANNEL_COUNT(4),
[midgard_alu_op_iadd] = UNITS_ADD,
[midgard_alu_op_isub] = UNITS_ADD,
[midgard_alu_op_imul] = UNITS_ALL,
[midgard_alu_op_imov] = UNITS_ALL,
/* For vector comparisons, use ball etc */
[midgard_alu_op_feq] = UNITS_ALL,
[midgard_alu_op_fne] = UNITS_ALL,
[midgard_alu_op_flt] = UNIT_SADD,
[midgard_alu_op_ieq] = UNITS_ALL,
[midgard_alu_op_ine] = UNITS_ALL,
[midgard_alu_op_ilt] = UNITS_ALL,
[midgard_alu_op_ile] = UNITS_ALL,
[midgard_alu_op_icsel] = UNITS_ADD,
[midgard_alu_op_fcsel] = UNITS_ADD | UNIT_SMUL,
[midgard_alu_op_frcp] = UNIT_VLUT,
[midgard_alu_op_frsqrt] = UNIT_VLUT,
[midgard_alu_op_fsqrt] = UNIT_VLUT,
[midgard_alu_op_fexp2] = UNIT_VLUT,
[midgard_alu_op_flog2] = UNIT_VLUT,
[midgard_alu_op_f2i] = UNITS_ADD,
[midgard_alu_op_f2u] = UNITS_ADD,
[midgard_alu_op_f2u8] = UNITS_ADD,
[midgard_alu_op_i2f] = UNITS_ADD,
[midgard_alu_op_u2f] = UNITS_ADD,
[midgard_alu_op_fsin] = UNIT_VLUT,
[midgard_alu_op_fcos] = UNIT_VLUT,
[midgard_alu_op_iand] = UNITS_ADD, /* XXX: Test case where it's right on smul but not sadd */
[midgard_alu_op_ior] = UNITS_ADD,
[midgard_alu_op_ixor] = UNITS_ADD,
[midgard_alu_op_inot] = UNITS_ALL,
[midgard_alu_op_ishl] = UNITS_ADD,
[midgard_alu_op_iasr] = UNITS_ADD,
[midgard_alu_op_ilsr] = UNITS_ADD,
[midgard_alu_op_ilsr] = UNITS_ADD,
[midgard_alu_op_fball_eq] = UNITS_ALL,
[midgard_alu_op_fbany_neq] = UNITS_ALL,
[midgard_alu_op_iball_eq] = UNITS_ALL,
[midgard_alu_op_ibany_neq] = UNITS_ALL
};

View File

@ -0,0 +1,70 @@
/* Author(s):
* Connor Abbott
* Alyssa Rosenzweig
*
* Copyright (c) 2013 Connor Abbott (connor@abbott.cx)
* Copyright (c) 2018 Alyssa Rosenzweig (alyssa@rosenzweig.io)
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef __midgard_parse_h__
#define __midgard_parse_h__
/* Additional metadata for parsing Midgard binaries, not needed for compilation */
static midgard_word_type midgard_word_types[16] = {
midgard_word_type_unknown, /* 0x0 */
midgard_word_type_unknown, /* 0x1 */
midgard_word_type_texture, /* 0x2 */
midgard_word_type_texture, /* 0x3 */
midgard_word_type_unknown, /* 0x4 */
midgard_word_type_load_store, /* 0x5 */
midgard_word_type_unknown, /* 0x6 */
midgard_word_type_unknown, /* 0x7 */
midgard_word_type_alu, /* 0x8 */
midgard_word_type_alu, /* 0x9 */
midgard_word_type_alu, /* 0xA */
midgard_word_type_alu, /* 0xB */
midgard_word_type_alu, /* 0xC */
midgard_word_type_alu, /* 0xD */
midgard_word_type_alu, /* 0xE */
midgard_word_type_alu, /* 0xF */
};
static unsigned midgard_word_size[16] = {
0, /* 0x0 */
0, /* 0x1 */
1, /* 0x2 */
1, /* 0x3 */
0, /* 0x4 */
1, /* 0x5 */
0, /* 0x6 */
0, /* 0x7 */
1, /* 0x8 */
2, /* 0x9 */
3, /* 0xA */
4, /* 0xB */
1, /* 0xC */
2, /* 0xD */
3, /* 0xE */
4, /* 0xF */
};
#endif

View File

@ -0,0 +1,473 @@
/* Author(s):
* Connor Abbott
* Alyssa Rosenzweig
*
* Copyright (c) 2013 Connor Abbott (connor@abbott.cx)
* Copyright (c) 2018 Alyssa Rosenzweig (alyssa@rosenzweig.io)
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef __midgard_h__
#define __midgard_h__
#include <stdint.h>
#include <stdbool.h>
typedef enum {
midgard_word_type_alu,
midgard_word_type_load_store,
midgard_word_type_texture,
midgard_word_type_unknown
} midgard_word_type;
typedef enum {
midgard_alu_vmul,
midgard_alu_sadd,
midgard_alu_smul,
midgard_alu_vadd,
midgard_alu_lut
} midgard_alu;
/*
* ALU words
*/
typedef enum {
midgard_alu_op_fadd = 0x10,
midgard_alu_op_fmul = 0x14,
midgard_alu_op_fmin = 0x28,
midgard_alu_op_fmax = 0x2C,
midgard_alu_op_fmov = 0x30,
midgard_alu_op_ffloor = 0x36,
midgard_alu_op_fceil = 0x37,
midgard_alu_op_fdot3 = 0x3C,
midgard_alu_op_fdot3r = 0x3D,
midgard_alu_op_fdot4 = 0x3E,
midgard_alu_op_freduce = 0x3F,
midgard_alu_op_iadd = 0x40,
midgard_alu_op_ishladd = 0x41,
midgard_alu_op_isub = 0x46,
midgard_alu_op_imul = 0x58,
midgard_alu_op_imin = 0x60,
midgard_alu_op_imax = 0x62,
midgard_alu_op_iasr = 0x68,
midgard_alu_op_ilsr = 0x69,
midgard_alu_op_ishl = 0x6E,
midgard_alu_op_iand = 0x70,
midgard_alu_op_ior = 0x71,
midgard_alu_op_inot = 0x72,
midgard_alu_op_iandnot = 0x74, /* (a, b) -> a & ~b, used for not/b2f */
midgard_alu_op_ixor = 0x76,
midgard_alu_op_imov = 0x7B,
midgard_alu_op_feq = 0x80,
midgard_alu_op_fne = 0x81,
midgard_alu_op_flt = 0x82,
midgard_alu_op_fle = 0x83,
midgard_alu_op_fball_eq = 0x88,
midgard_alu_op_bball_eq = 0x89,
midgard_alu_op_bbany_neq = 0x90, /* used for bvec4(1) */
midgard_alu_op_fbany_neq = 0x91, /* bvec4(0) also */
midgard_alu_op_f2i = 0x99,
midgard_alu_op_f2u8 = 0x9C,
midgard_alu_op_f2u = 0x9D,
midgard_alu_op_ieq = 0xA0,
midgard_alu_op_ine = 0xA1,
midgard_alu_op_ilt = 0xA4,
midgard_alu_op_ile = 0xA5,
midgard_alu_op_iball_eq = 0xA8,
midgard_alu_op_ball = 0xA9,
midgard_alu_op_ibany_neq = 0xB1,
midgard_alu_op_i2f = 0xB8,
midgard_alu_op_u2f = 0xBC,
midgard_alu_op_icsel = 0xC1,
midgard_alu_op_fcsel = 0xC5,
midgard_alu_op_fatan_pt2 = 0xE8,
midgard_alu_op_frcp = 0xF0,
midgard_alu_op_frsqrt = 0xF2,
midgard_alu_op_fsqrt = 0xF3,
midgard_alu_op_fexp2 = 0xF4,
midgard_alu_op_flog2 = 0xF5,
midgard_alu_op_fsin = 0xF6,
midgard_alu_op_fcos = 0xF7,
midgard_alu_op_fatan2_pt1 = 0xF9,
} midgard_alu_op;
typedef enum {
midgard_outmod_none = 0,
midgard_outmod_pos = 1,
midgard_outmod_int = 2,
midgard_outmod_sat = 3
} midgard_outmod;
typedef enum {
midgard_reg_mode_quarter = 0,
midgard_reg_mode_half = 1,
midgard_reg_mode_full = 2,
midgard_reg_mode_double = 3 /* TODO: verify */
} midgard_reg_mode;
typedef enum {
midgard_dest_override_lower = 0,
midgard_dest_override_upper = 1,
midgard_dest_override_none = 2
} midgard_dest_override;
typedef struct
__attribute__((__packed__))
{
bool abs : 1;
bool negate : 1;
/* replicate lower half if dest = half, or low/high half selection if
* dest = full
*/
bool rep_low : 1;
bool rep_high : 1; /* unused if dest = full */
bool half : 1; /* only matters if dest = full */
unsigned swizzle : 8;
}
midgard_vector_alu_src;
typedef struct
__attribute__((__packed__))
{
midgard_alu_op op : 8;
midgard_reg_mode reg_mode : 2;
unsigned src1 : 13;
unsigned src2 : 13;
midgard_dest_override dest_override : 2;
midgard_outmod outmod : 2;
unsigned mask : 8;
}
midgard_vector_alu;
typedef struct
__attribute__((__packed__))
{
bool abs : 1;
bool negate : 1;
bool full : 1; /* 0 = half, 1 = full */
unsigned component : 3;
}
midgard_scalar_alu_src;
typedef struct
__attribute__((__packed__))
{
midgard_alu_op op : 8;
unsigned src1 : 6;
unsigned src2 : 11;
unsigned unknown : 1;
midgard_outmod outmod : 2;
bool output_full : 1;
unsigned output_component : 3;
}
midgard_scalar_alu;
typedef struct
__attribute__((__packed__))
{
unsigned src1_reg : 5;
unsigned src2_reg : 5;
unsigned out_reg : 5;
bool src2_imm : 1;
}
midgard_reg_info;
typedef enum {
midgard_jmp_writeout_op_branch_uncond = 1,
midgard_jmp_writeout_op_branch_cond = 2,
midgard_jmp_writeout_op_discard = 4,
midgard_jmp_writeout_op_writeout = 7,
} midgard_jmp_writeout_op;
typedef enum {
midgard_condition_write0 = 0,
midgard_condition_false = 1,
midgard_condition_true = 2,
midgard_condition_always = 3, /* Special for writeout/uncond discard */
} midgard_condition;
typedef struct
__attribute__((__packed__))
{
midgard_jmp_writeout_op op : 3; /* == branch_uncond */
unsigned dest_tag : 4; /* tag of branch destination */
unsigned unknown : 2;
int offset : 7;
}
midgard_branch_uncond;
typedef struct
__attribute__((__packed__))
{
midgard_jmp_writeout_op op : 3; /* == branch_cond */
unsigned dest_tag : 4; /* tag of branch destination */
int offset : 7;
midgard_condition cond : 2;
}
midgard_branch_cond;
typedef struct
__attribute__((__packed__))
{
midgard_jmp_writeout_op op : 3; /* == branch_cond */
unsigned dest_tag : 4; /* tag of branch destination */
unsigned unknown : 2;
signed offset : 7;
unsigned zero : 16;
unsigned cond : 16;
}
midgard_branch_extended;
typedef struct
__attribute__((__packed__))
{
midgard_jmp_writeout_op op : 3; /* == writeout */
unsigned unknown : 13;
}
midgard_writeout;
/*
* Load/store words
*/
typedef enum {
midgard_op_ld_st_noop = 0x03,
midgard_op_load_attr_16 = 0x95,
midgard_op_load_attr_32 = 0x94,
midgard_op_load_vary_16 = 0x99,
midgard_op_load_vary_32 = 0x98,
midgard_op_load_color_buffer_16 = 0x9D,
midgard_op_load_color_buffer_8 = 0xBA,
midgard_op_load_uniform_16 = 0xAC,
midgard_op_load_uniform_32 = 0xB0,
midgard_op_store_vary_16 = 0xD5,
midgard_op_store_vary_32 = 0xD4
} midgard_load_store_op;
typedef enum {
midgard_interp_centroid = 1,
midgard_interp_default = 2
} midgard_interpolation;
typedef struct
__attribute__((__packed__))
{
unsigned zero1 : 4; /* Always zero */
/* Varying qualifiers, zero if not a varying */
unsigned flat : 1;
unsigned is_varying : 1; /* Always one for varying, but maybe something else? */
midgard_interpolation interpolation : 2;
unsigned zero2 : 2; /* Always zero */
}
midgard_varying_parameter;
typedef struct
__attribute__((__packed__))
{
midgard_load_store_op op : 8;
unsigned reg : 5;
unsigned mask : 4;
unsigned swizzle : 8;
unsigned unknown : 16;
unsigned varying_parameters : 10;
unsigned address : 9;
}
midgard_load_store_word;
typedef struct
__attribute__((__packed__))
{
unsigned type : 4;
unsigned next_type : 4;
uint64_t word1 : 60;
uint64_t word2 : 60;
}
midgard_load_store;
/* Texture pipeline results are in r28-r29 */
#define REG_TEX_BASE 28
/* Texture opcodes... maybe? */
#define TEXTURE_OP_NORMAL 0x11
#define TEXTURE_OP_TEXEL_FETCH 0x14
/* Texture format types, found in format */
#define TEXTURE_CUBE 0x00
#define TEXTURE_2D 0x02
#define TEXTURE_3D 0x03
typedef struct
__attribute__((__packed__))
{
unsigned type : 4;
unsigned next_type : 4;
unsigned op : 6;
unsigned shadow : 1;
unsigned unknown3 : 1;
/* A little obscure, but last is set for the last texture operation in
* a shader. cont appears to just be last's opposite (?). Yeah, I know,
* kind of funky.. BiOpen thinks it could do with memory hinting, or
* tile locking? */
unsigned cont : 1;
unsigned last : 1;
unsigned format : 5;
unsigned has_offset : 1;
/* Like in Bifrost */
unsigned filter : 1;
unsigned in_reg_select : 1;
unsigned in_reg_upper : 1;
unsigned in_reg_swizzle_left : 2;
unsigned in_reg_swizzle_right : 2;
unsigned unknown1 : 2;
unsigned unknown8 : 4;
unsigned out_full : 1;
/* Always 1 afaict... */
unsigned unknown7 : 2;
unsigned out_reg_select : 1;
unsigned out_upper : 1;
unsigned mask : 4;
unsigned unknown2 : 2;
unsigned swizzle : 8;
unsigned unknown4 : 8;
unsigned unknownA : 4;
unsigned offset_unknown1 : 1;
unsigned offset_reg_select : 1;
unsigned offset_reg_upper : 1;
unsigned offset_unknown4 : 1;
unsigned offset_unknown5 : 1;
unsigned offset_unknown6 : 1;
unsigned offset_unknown7 : 1;
unsigned offset_unknown8 : 1;
unsigned offset_unknown9 : 1;
unsigned unknownB : 3;
/* Texture bias or LOD, depending on whether it is executed in a
* fragment/vertex shader respectively. Compute as int(2^8 * biasf).
*
* For texel fetch, this is the LOD as is. */
unsigned bias : 8;
unsigned unknown9 : 8;
unsigned texture_handle : 16;
unsigned sampler_handle : 16;
}
midgard_texture_word;
/* Opcode name table */
static char *alu_opcode_names[256] = {
[midgard_alu_op_fadd] = "fadd",
[midgard_alu_op_fmul] = "fmul",
[midgard_alu_op_fmin] = "fmin",
[midgard_alu_op_fmax] = "fmax",
[midgard_alu_op_fmov] = "fmov",
[midgard_alu_op_ffloor] = "ffloor",
[midgard_alu_op_fceil] = "fceil",
[midgard_alu_op_fdot3] = "fdot3",
[midgard_alu_op_fdot3r] = "fdot3r",
[midgard_alu_op_fdot4] = "fdot4",
[midgard_alu_op_freduce] = "freduce",
[midgard_alu_op_imin] = "imin",
[midgard_alu_op_imax] = "imax",
[midgard_alu_op_ishl] = "ishl",
[midgard_alu_op_iasr] = "iasr",
[midgard_alu_op_ilsr] = "ilsr",
[midgard_alu_op_iadd] = "iadd",
[midgard_alu_op_ishladd] = "ishladd",
[midgard_alu_op_isub] = "isub",
[midgard_alu_op_imul] = "imul",
[midgard_alu_op_imov] = "imov",
[midgard_alu_op_iand] = "iand",
[midgard_alu_op_ior] = "ior",
[midgard_alu_op_inot] = "inot",
[midgard_alu_op_iandnot] = "iandnot",
[midgard_alu_op_ixor] = "ixor",
[midgard_alu_op_feq] = "feq",
[midgard_alu_op_fne] = "fne",
[midgard_alu_op_flt] = "flt",
[midgard_alu_op_fle] = "fle",
[midgard_alu_op_fball_eq] = "fball_eq",
[midgard_alu_op_fbany_neq] = "fbany_neq",
[midgard_alu_op_bball_eq] = "bball_eq",
[midgard_alu_op_bbany_neq] = "bbany_neq",
[midgard_alu_op_f2i] = "f2i",
[midgard_alu_op_f2u] = "f2u",
[midgard_alu_op_f2u8] = "f2u8",
[midgard_alu_op_ieq] = "ieq",
[midgard_alu_op_ine] = "ine",
[midgard_alu_op_ilt] = "ilt",
[midgard_alu_op_ile] = "ile",
[midgard_alu_op_iball_eq] = "iball_eq",
[midgard_alu_op_ball] = "ball",
[midgard_alu_op_ibany_neq] = "ibany_neq",
[midgard_alu_op_i2f] = "i2f",
[midgard_alu_op_u2f] = "u2f",
[midgard_alu_op_icsel] = "icsel",
[midgard_alu_op_fcsel] = "fcsel",
[midgard_alu_op_fatan_pt2] = "fatan_pt2",
[midgard_alu_op_frcp] = "frcp",
[midgard_alu_op_frsqrt] = "frsqrt",
[midgard_alu_op_fsqrt] = "fsqrt",
[midgard_alu_op_fexp2] = "fexp2",
[midgard_alu_op_flog2] = "flog2",
[midgard_alu_op_fsin] = "fsin",
[midgard_alu_op_fcos] = "fcos",
[midgard_alu_op_fatan2_pt1] = "fatan2_pt1"
};
static char *load_store_opcode_names[256] = {
[midgard_op_load_attr_16] = "ld_attr_16",
[midgard_op_load_attr_32] = "ld_attr_32",
[midgard_op_load_vary_16] = "ld_vary_16",
[midgard_op_load_vary_32] = "ld_vary_32",
[midgard_op_load_uniform_16] = "ld_uniform_16",
[midgard_op_load_uniform_32] = "ld_uniform_32",
[midgard_op_load_color_buffer_8] = "ld_color_buffer_8",
[midgard_op_load_color_buffer_16] = "ld_color_buffer_16",
[midgard_op_store_vary_16] = "st_vary_16",
[midgard_op_store_vary_32] = "st_vary_32"
};
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,80 @@
/*
* Copyright (C) 2018 Alyssa Rosenzweig <alyssa@rosenzweig.io>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "compiler/nir/nir.h"
#include "util/u_dynarray.h"
/* Define the general compiler entry point */
typedef struct {
int work_register_count;
int uniform_count;
int uniform_cutoff;
int attribute_count;
int varying_count;
/* Boolean properties of the program */
bool can_discard;
bool writes_point_size;
int first_tag;
struct util_dynarray compiled;
/* For a blend shader using a constant color -- patch point. If
* negative, there's no constant. */
int blend_patch_offset;
/* IN: For a fragment shader with a lowered alpha test, the ref value */
float alpha_ref;
} midgard_program;
int
midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_blend);
/* NIR options are shared between the standalone compiler and the online
* compiler. Defining it here is the simplest, though maybe not the Right
* solution. */
static const nir_shader_compiler_options midgard_nir_options = {
.lower_ffma = true,
.lower_sub = true,
.lower_fpow = true,
.lower_scmp = true,
.lower_flrp32 = true,
.lower_flrp64 = true,
.lower_ffract = true,
.lower_fmod32 = true,
.lower_fmod64 = true,
.lower_fdiv = true,
.lower_idiv = true,
.vertex_id_zero_based = true,
.lower_extract_byte = true,
.lower_extract_word = true,
.native_integers = true
};

View File

@ -0,0 +1,5 @@
#include <stdbool.h>
#include "nir.h"
bool midgard_nir_lower_algebraic(nir_shader *shader);
bool midgard_nir_scale_trig(nir_shader *shader);

View File

@ -0,0 +1,71 @@
#
# Copyright (C) 2018 Alyssa Rosenzweig
#
# Copyright (C) 2016 Intel Corporation
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice (including the next
# paragraph) shall be included in all copies or substantial portions of the
# Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
import argparse
import sys
import math
a = 'a'
b = 'b'
algebraic = [
(('b2i32', a), ('iand@32', "a@32", 1)),
(('isign', a), ('imin', ('imax', a, -1), 1)),
(('fge', a, b), ('flt', b, a)),
# XXX: We have hw ops for this, just unknown atm..
#(('fsign@32', a), ('i2f32@32', ('isign', ('f2i32@32', ('fmul', a, 0x43800000)))))
#(('fsign', a), ('fcsel', ('fge', a, 0), 1.0, ('fcsel', ('flt', a, 0.0), -1.0, 0.0)))
(('fsign', a), ('bcsel', ('fge', a, 0), 1.0, -1.0)),
]
# Midgard scales fsin/fcos arguments by pi.
# Pass must be run only once, after the main loop
scale_trig = [
(('fsin', a), ('fsin', ('fdiv', a, math.pi))),
(('fcos', a), ('fcos', ('fdiv', a, math.pi))),
]
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-p', '--import-path', required=True)
args = parser.parse_args()
sys.path.insert(0, args.import_path)
run()
def run():
import nir_algebraic # pylint: disable=import-error
print('#include "midgard_nir.h"')
print(nir_algebraic.AlgebraicPass("midgard_nir_lower_algebraic",
algebraic).render())
print(nir_algebraic.AlgebraicPass("midgard_nir_scale_trig",
scale_trig).render())
if __name__ == '__main__':
main()