panfrost: Implement Midgard shader toolchain

This patch implements the free Midgard shader toolchain: the assembler, the disassembler, and the NIR-based compiler. The assembler is a standalone inaccessible Python script for reference purposes. The disassembler and the compiler are implemented in C, accessible via the standalone `midgard_compiler` binary. Later patches will use these interfaces from the driver for online compilation. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Acked-by: Jason Ekstrand <jason@jlekstrand.net> Acked-by: Rob Clark <robdclark@gmail.com> Acked-by: Eric Anholt <eric@anholt.net> Acked-by: Emil Velikov <emil.velikov@collabora.com>
2019-01-30 01:11:31 +00:00 · 2019-01-30 01:11:31 +00:00 · e67e072637
parent 61d3ae6e0b
commit e67e072637
13 changed files with 6383 additions and 2 deletions
--- a/src/gallium/drivers/panfrost/meson.build
+++ b/src/gallium/drivers/panfrost/meson.build
@ -23,6 +23,10 @@ files_panfrost = files(
  'pan_public.h',
  'pan_screen.c',
  'pan_screen.h',
+
+  'midgard/midgard_compile.c',
+  'midgard/cppwrap.cpp',
+  'midgard/disassemble.c',
 )

 inc_panfrost = [
@ -32,12 +36,25 @@ inc_panfrost = [
  inc_drm_uapi,
  inc_include,
  inc_src,
-  include_directories('include')
+  include_directories('include'),
+  include_directories('midgard'),
 ]

+midgard_nir_algebraic_c = custom_target(
+  'midgard_nir_algebraic.c',
+  input : 'midgard/midgard_nir_algebraic.py',
+  output : 'midgard_nir_algebraic.c',
+  command : [
+    prog_python, '@INPUT@',
+    '-p', join_paths(meson.source_root(), 'src/compiler/nir/'),
+  ],
+  capture : true,
+  depend_files : nir_algebraic_py,
+)
+
 libpanfrost = static_library(
  'panfrost',
-  [files_panfrost],
+  [files_panfrost, midgard_nir_algebraic_c],
  dependencies: [
    dep_thread,
    idep_nir
@ -50,3 +67,26 @@ driver_panfrost = declare_dependency(
  compile_args : ['-DGALLIUM_PANFROST', '-Wno-pointer-arith'],
  link_with : [libpanfrost, libpanfrostwinsys],
 )
+
+files_midgard = files(
+  'midgard/midgard_compile.c',
+  'midgard/cppwrap.cpp',
+  'midgard/disassemble.c',
+  'midgard/cmdline.c',
+)
+
+midgard_compiler = executable(
+  'midgard_compiler',
+  [files_midgard, midgard_nir_algebraic_c],
+  include_directories : inc_panfrost,
+  dependencies : [
+    dep_thread,
+    idep_nir
+  ],
+  link_with : [
+    libgallium,
+    libglsl_standalone,
+    libmesa_util
+  ],
+  build_by_default : true
+)
--- a/src/gallium/drivers/panfrost/midgard/assemble.py
+++ b/src/gallium/drivers/panfrost/midgard/assemble.py
@ -0,0 +1,643 @@
+"""
+Copyright (C) 2018 Alyssa Rosenzweig
+Copyright (c) 2013 Connor Abbott (connor@abbott.cx)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+"""
+
+import sys
+import pprint
+import struct
+
+program = []
+
+# Definitions from cwabbott's tools
+
+t6xx_alu_ops = {
+    "fadd":  0x10,
+    "fmul":  0x14,
+    "fmin":  0x28,
+    "fmax":  0x2C,
+    "fmov":  0x30,
+    "ffloor":  0x36,
+    "fceil":  0x37,
+    "fdot3":  0x3C,
+    "fdot3r":  0x3D,
+    "fdot4":  0x3E,
+    "freduce":  0x3F,
+    "iadd":  0x40,
+    "isub":  0x46,
+    "imul":  0x58,
+    "imov":  0x7B,
+    "feq":  0x80,
+    "fne":  0x81,
+    "flt":  0x82,
+    "fle":  0x83,
+    "f2i":  0x99,
+    "f2u8":  0x9C,
+    "u2f": 0xBC,
+    "ieq":  0xA0,
+    "ine":  0xA1,
+    "ilt":  0xA4,
+    "ile":  0xA5,
+    "iand": 0x70,
+    "ior": 0x71,
+    "inot": 0x72,
+    "iandnot": 0x74,
+    "ixor": 0x76,
+    "ball":  0xA9,
+    "bany":  0xB1,
+    "i2f":  0xB8,
+    "csel":  0xC5,
+    "fatan_pt2":  0xE8,
+    "frcp":  0xF0,
+    "frsqrt":  0xF2,
+    "fsqrt":  0xF3,
+    "fexp2":  0xF4,
+    "flog2":  0xF5,
+    "fsin":  0xF6,
+    "fcos":  0xF7,
+    "fatan2_pt1":  0xF9,
+}
+
+t6xx_alu_bits = {
+        "vmul": 17,
+        "sadd": 19,
+        "vadd": 21,
+        "smul": 23,
+        "lut": 25,
+        "br": 26,
+        "branch": 27,
+        "constants": 32
+}
+
+t6xx_alu_size_bits = {
+        "vmul": 48,
+        "sadd": 32,
+        "vadd": 48,
+        "smul": 32,
+        "lut": 48,
+        "br": 16,
+        "branch": 48
+}
+
+t6xx_outmod = {
+        "none": 0,
+        "pos": 1,
+        "int": 2,
+        "sat": 3
+}
+
+t6xx_reg_mode = {
+    "quarter": 0,
+    "half": 1,
+    "full": 2,
+    "double": 3
+}
+
+t6xx_dest_override = {
+    "lower": 0,
+    "upper": 1,
+    "none": 2
+}
+
+t6xx_load_store_ops = {
+    "ld_st_noop":  0x03,
+    "ld_attr_16":  0x95,
+    "ld_attr_32":  0x94,
+    "ld_vary_16":  0x99,
+    "ld_vary_32":  0x98,
+    "ld_uniform_16":  0xAC,
+    "ld_uniform_32":  0xB0,
+    "st_vary_16":  0xD5,
+    "st_vary_32":  0xD4,
+    "ld_color_buffer_8": 0xBA
+}
+
+t6xx_tag = {
+        "texture": 0x3,
+        "load_store": 0x5,
+        "alu4": 0x8,
+        "alu8": 0x9,
+        "alu12": 0xA,
+        "alu16": 0xB,
+}
+
+def is_tag_alu(tag):
+    return (tag >= t6xx_tag["alu4"]) and (tag <= t6xx_tag["alu16"])
+
+# Just an enum
+
+ALU = 0
+LDST = 1
+TEXTURE = 2
+
+# Constant types supported, mapping the constant prefix to the Python format
+# string and the coercion function
+
+constant_types = {
+        "f": ("f", float),
+        "h": ("e", float),
+        "i": ("i", int),
+        "s": ("h", int)
+}
+
+compact_branch_op = {
+        "jump": 1,
+        "branch": 2,
+        "discard": 4,
+        "write": 7
+}
+
+branch_condition = {
+        "false": 1,
+        "true": 2,
+        "always": 3,
+}
+
+# TODO: What else?
+
+texture_op = {
+        "normal": 0x11,
+        "texelfetch": 0x14
+}
+
+texture_fmt = {
+        "2d": 0x02,
+        "3d": 0x03
+}
+	
+with open(sys.argv[1], "r") as f:
+    for ln in f:
+        space = ln.strip().split(" ")
+
+        instruction = space[0]
+        rest = " ".join(space[1:])
+
+        arguments = [s.strip() for s in rest.split(",")]
+        program += [(instruction, arguments)]
+
+swizzle_component = {
+        "x": 0,
+        "y": 1,
+        "z": 2,
+        "w": 3
+}
+
+def decode_reg_name(reg_name):
+    ireg = 0
+    upper = False
+    half = False
+
+    if reg_name[0] == 'r':
+        ireg = int(reg_name[1:])
+    elif reg_name[0] == 'h':
+        rreg = int(reg_name[2:])
+
+        # Decode half-register into its full register's half
+        ireg = rreg >> 1
+        upper = rreg & 1
+        half = True
+    else:
+        # Special case for load/store addresses
+        ireg = int(reg_name)
+
+    return (ireg, half, upper)
+
+def standard_swizzle_from_parts(swizzle_parts):
+    swizzle_s = swizzle_parts[1] if len(swizzle_parts) > 1 else "xyzw"
+
+    swizzle = 0
+    for (i, c) in enumerate(swizzle_s):
+        swizzle |= swizzle_component[c] << (2 * i)
+
+    return swizzle
+
+def mask_from_parts(mask_parts, large_mask):
+    mask_s = mask_parts[1] if len(mask_parts) > 1 else "xyzw"
+
+    if large_mask:
+        mask = sum([(3 << (2*swizzle_component[c]) if c in mask_s else 0) for c in "xyzw"])
+    else:
+        mask = sum([(1 << swizzle_component[c] if c in mask_s else 0) for c in "xyzw"])
+
+    return (mask, mask_s)
+
+def decode_reg(reg):
+    if reg[0] == "#":
+        # Not actually a register, instead an immediate float
+        return (True, struct.unpack("H", struct.pack("e", float(reg[1:])))[0], 0, 0, 0, 0)
+
+    # Function call syntax used in abs() modifier
+    if reg[-1] == ')':
+        reg = reg[:-1]
+
+    swizzle_parts = reg.split(".")
+
+    reg_name = swizzle_parts[0]
+
+    modifiers = 0
+
+    if reg_name[0] == '-':
+        modifiers |= 2
+        reg_name = reg_name[1:]
+
+    if reg_name[0] == 'a':
+        modifiers |= 1
+        reg_name = reg_name[len("abs("):]
+    
+    (ireg, half, upper) = decode_reg_name(reg_name)
+
+    return (False, ireg, standard_swizzle_from_parts(swizzle_parts), half, upper, modifiers)
+
+def decode_masked_reg(reg, large_mask):
+    mask_parts = reg.split(".")
+
+    reg_name = mask_parts[0]
+    (ireg, half, upper) = decode_reg_name(reg_name)
+    (mask, mask_s) = mask_from_parts(mask_parts, large_mask)
+
+    component = max([0] + [swizzle_component[c] for c in "xyzw" if c in mask_s])
+
+    return (ireg, mask, component, half, upper)
+
+# TODO: Fill these in XXX
+
+# Texture pipeline registers in r28-r29
+TEXTURE_BASE = 28
+
+def decode_texture_reg_number(reg):
+    r = reg.split(".")[0]
+
+    if r[0] == "r":
+        return (True, int(r[1:]) - TEXTURE_BASE, 0)
+    else:
+        no = int(r[2:])
+        return (False, (no >> 1) - TEXTURE_BASE, no & 1)
+
+def decode_texture_reg(reg):
+    (full, select, upper) = decode_texture_reg_number(reg)
+
+    # Swizzle mandatory for texture registers, afaict
+    swizzle = reg.split(".")[1]
+    swizzleL = swizzle_component[swizzle[0]]
+    swizzleR = swizzle_component[swizzle[1]]
+
+    return (full, select, upper, swizzleR, swizzleL)
+
+def decode_texture_out_reg(reg):
+    (full, select, upper) = decode_texture_reg_number(reg)
+    (mask, _) = mask_from_parts(reg.split("."), False)
+
+    return (full, select, upper, mask)
+
+instruction_stream = []
+
+for p in program:
+    ins = p[0]
+    arguments = p[1]
+
+    family = ins_mod = ins.split(".")[0]
+    ins_op = (ins + ".").split(".")[1]
+
+    ins_outmod = (ins + "." + ".").split(".")[2]
+    
+    try:
+        out_mod = t6xx_outmod[ins_outmod]
+    except:
+        out_mod = 0
+
+    if ins in t6xx_load_store_ops:
+        op = t6xx_load_store_ops[ins]
+        (reg, mask, component, half, upper) = decode_masked_reg(p[1][0], False)
+        (immediate, address, swizzle, half, upper, modifiers) = decode_reg(p[1][1])
+        unknown = int(p[1][2], 16)
+        b = (op << 0) | (reg << 8) | (mask << 13) | (swizzle << 17) | (unknown << 25) | (address << 51)
+        instruction_stream += [(LDST, b)]
+    elif ins_op in t6xx_alu_ops:
+        op = t6xx_alu_ops[ins_op]
+
+        (reg_out, mask, out_component, half0, upper0) = decode_masked_reg(p[1][0], True)
+        (_, reg_in1, swizzle1, half1, upper1, mod1) = decode_reg(p[1][1])
+        (immediate, reg_in2, swizzle2, half2, upper2, mod2) = decode_reg(p[1][2])
+
+        if immediate:
+            register_word = (reg_in1 << 0) | ((reg_in2 >> 11) << 5) | (reg_out << 10) | (1 << 15)
+        else:
+            register_word = (reg_in1 << 0) | (reg_in2 << 5) | (reg_out << 10) 
+
+        if ins_mod in ["vadd", "vmul", "lut"]:
+            io_mode = t6xx_reg_mode["half" if half0 else "full"]
+            repsel = 0
+            i1half = half1
+            i2block = 0
+            output_override = 2 # NORMAL, TODO
+            wr_mask = 0
+
+            if (ins_outmod == "quarter"):
+                io_mode = t6xx_reg_mode["quarter"]
+
+            if half0:
+                # TODO: half actually
+                repsel = 2 * upper1 
+            else:
+                repsel = upper1
+
+            if half0:
+                # Rare case...
+
+                (_, halfmask, _, _, _) = decode_masked_reg(p[1][0], False)
+                wr_mask = halfmask
+            else:
+                wr_mask = mask
+
+
+            if immediate:
+                # Inline constant: lower 11 bits
+
+                i2block = ((reg_in2 & 0xFF) << 3) | ((reg_in2 >> 8) & 0x7)
+            else:
+                if half0:
+                    # TODO: replicate input 2 if half
+                    pass
+                else:
+                    # TODO: half selection
+                    i2block = upper2 | (half2 << 2)
+
+                i2block |= swizzle2 << 3
+
+            # Extra modifier for some special cased stuff
+            try:
+                special = ins.split(".")[3]
+
+                if special == "low":
+                    output_override = 0 # low
+                elif special == "fulllow":
+                    # TODO: Not really a special case, just a bug?
+                    io_mode = t6xx_reg_mode["full"]
+                    output_override = 0 #low
+                    wr_mask = 0xFF
+            except:
+                pass
+
+            instruction_word = (op << 0) | (io_mode << 8) | (mod1 << 10) | (repsel << 12) | (i1half << 14) | (swizzle1 << 15) | (mod2 << 23) | (i2block << 25) | (output_override << 36) | (out_mod << 38) | (wr_mask << 40)
+        elif ins_mod in ["sadd", "smul"]:
+            # TODO: What are these?
+            unknown2 = 0
+            unknown3 = 0
+
+            i1comp_block = 0
+
+            if half1:
+                i1comp_block = swizzle1 | (upper1 << 2)
+            else:
+                i1comp_block = swizzle1 << 1
+
+            i2block = 0
+
+            if immediate:
+                # Inline constant is splattered in a... bizarre way
+
+                i2block = (((reg_in2 >> 9) & 3) << 0) | (((reg_in2 >> 8) & 1) << 2) | (((reg_in2 >> 5) & 7) << 3) | (((reg_in2 >> 0) & 15) << 6)
+            else:
+                # TODO: half register
+                swizzle2 = (swizzle2 << 1) & 0x1F
+                i2block = (mod2 << 0) | ((not half2) << 2) | (swizzle2 << 3) | (unknown2 << 5)
+
+            outcomp_block = 0
+            
+            if True:
+                outcomp_block = out_component << 1
+            else:
+                # TODO: half register
+                pass
+
+            instruction_word = (op << 0) | (mod1 << 8) | ((not half1) << 10) | (i1comp_block << 11) | (i2block << 14) | (unknown3 << 25) | (out_mod << 26) | ((not half0) << 28) | (outcomp_block) << 29
+
+        else:
+            instruction_word = op
+
+        instruction_stream += [(ALU, ins_mod, register_word, instruction_word)]
+    elif family == "texture":
+        # Texture ops use long series of modifiers to describe their needed
+        # capabilities, seperated by dots. Decode them here
+        parts = ins.split(".")
+
+        # First few modifiers are fixed, like an instruction name
+        tex_op = parts[1]
+        tex_fmt = parts[2]
+
+        # The remaining are variable, but strictly ordered
+        parts = parts[3:]
+
+        op = texture_op[tex_op]
+
+        # Some bits are defined directly in the modifier list
+        shadow = "shadow" in parts
+        cont = "cont" in parts
+        last = "last" in parts
+        has_filter = "raw" not in parts
+
+        # The remaining need order preserved since they have their own arguments
+        argument_parts = [part for part in parts if part not in ["shadow", "cont", "last", "raw"]]
+
+        bias_lod = 0
+
+        for argument, part in zip(argument_parts, arguments[4:]):
+            if argument == "bias":
+                bias_lod = int(float(part) * 256)
+            else:
+                print("Unknown argument: " + str(argument))
+
+        fmt = texture_fmt[tex_fmt]
+        has_offset = 0
+
+        magic1 = 1 # IDEK
+        magic2 = 2 # Where did this even come from?!
+
+        texture_handle = int(arguments[1][len("texture"):])
+        
+        sampler_parts = arguments[2].split(".")
+        sampler_handle = int(sampler_parts[0][len("sampler"):])
+        swizzle0 = standard_swizzle_from_parts(sampler_parts)
+
+        (full0, select0, upper0, mask0) = decode_texture_out_reg(arguments[0])
+        (full1, select1, upper1, swizzleR1, swizzleL1) = decode_texture_reg(arguments[3])
+
+        tex = (op << 0) | (shadow << 6) | (cont << 8) | (last << 9) | (fmt << 10) | (has_offset << 15) | (has_filter << 16) | (select1 << 17) | (upper1 << 18) | (swizzleL1 << 19) | (swizzleR1 << 21) | (0 << 23) | (magic2 << 25) | (full0 << 29) | (magic1 << 30) | (select0 << 32) | (upper0 << 33) | (mask0 << 34) | (swizzle0 << 40) | (bias_lod << 72) | (texture_handle << 88) | (sampler_handle << 104)
+
+        instruction_stream += [(TEXTURE, tex)]
+    elif family == "br":
+        cond = ins.split(".")[2]
+        condition = branch_condition[cond]
+        bop = compact_branch_op[ins_op]
+
+        offset = int(arguments[0].split("->")[0])
+
+        # 2's complement and chill
+        if offset < 0:
+            offset = (1 << 7) - abs(offset)
+
+        # Find where we're going
+        dest_tag = int(arguments[0].split("->")[1])
+
+        br = (bop << 0) | (dest_tag << 3) | (offset << 7) | (condition << 14)
+
+        # TODO: Unconditional branch encoding
+
+        instruction_stream += [(ALU, "br", None, br)]
+    elif ins[1:] == "constants":
+        if ins[0] not in constant_types:
+            print("Unknown constant type " + str(constant_type))
+            break
+
+        (fmt, cast) = constant_types[ins[0]]
+
+        encoded = [struct.pack(fmt, cast(f)) for f in p[1]]
+
+        consts = bytearray()
+        for c in encoded:
+            consts += c
+
+        # consts must be exactly 4 quadwords, so pad with zeroes if necessary
+        consts += bytes(4*4 - len(consts))
+
+        instruction_stream += [(ALU, "constants", consts)]
+
+# Emit from instruction stream
+instructions = []
+index = 0
+while index < len(instruction_stream):
+    output_stream = bytearray()
+    ins = instruction_stream[index]
+    tag = ins[0]
+
+    can_prefetch = index + 1 < len(instruction_stream)
+    succeeding = None
+
+    if tag == LDST:
+        succeeding = instruction_stream[index + 1] if can_prefetch else None
+        parta = ins[1]
+        partb = None
+
+        if succeeding and succeeding[0] == LDST:
+            partb = succeeding[1]
+            index += 1
+        else:
+            partb = parta
+            parta = t6xx_load_store_ops["ld_st_noop"]
+
+        tag8 = t6xx_tag["load_store"]
+
+        ins = (partb << 68) | (parta << 8) | tag8
+        output_stream += (ins.to_bytes(16, "little"))
+    elif tag == TEXTURE:
+        tag8 = t6xx_tag["texture"] 
+        ins = (ins[1] << 8) | tag8
+
+        output_stream += (ins.to_bytes(16, "little"))
+    elif tag == ALU:
+        # TODO: Combining ALU ops
+
+        emit_size = 4 # 32-bit tag always emitted
+
+        tag = 0
+        register_words = bytearray()
+        body_words = bytearray()
+        constant_words = None
+
+        last_alu_bit = 0
+
+        # Iterate through while there are ALU tags in strictly ascending order
+        while index < len(instruction_stream) and instruction_stream[index][0] == ALU and t6xx_alu_bits[instruction_stream[index][1]] > last_alu_bit:
+            ins = instruction_stream[index]
+
+            bit = t6xx_alu_bits[ins[1]]
+            last_alu_bit = bit
+
+            if ins[1] == "constants":
+                constant_words = ins[2]
+            else:
+                # Flag for the used part of the GPU
+                tag |= 1 << bit
+
+                # 16-bit register word, if present
+                if ins[2] is not None:
+                    register_words += (ins[2].to_bytes(2, "little"))
+                    emit_size += 2
+
+                size = int(t6xx_alu_size_bits[ins[1]] / 8)
+                body_words += (ins[3].to_bytes(size, "little"))
+                emit_size += size
+
+            index += 1
+
+        index -= 1 # fix off by one, from later loop increment
+
+        # Pad to nearest multiple of 4 words
+        padding = (16 - (emit_size & 15)) if (emit_size & 15) else 0
+        emit_size += padding
+
+        # emit_size includes constants
+        if constant_words:
+            emit_size += len(constant_words)
+
+        # Calculate tag given size
+        words = emit_size >> 2
+        tag |= t6xx_tag["alu" + str(words)]
+
+        # Actually emit, now that we can
+        output_stream += tag.to_bytes(4, "little")
+        output_stream += register_words
+        output_stream += body_words
+        output_stream += bytes(padding)
+
+        if constant_words:
+            output_stream += constant_words
+
+    instructions += [output_stream]
+    index += 1
+
+# Assmebly over; just emit tags at this point
+binary = bytearray()
+
+for (idx, ins) in enumerate(instructions):
+    # Instruction prefetch
+    tag = 0
+
+    if idx + 1 < len(instructions):
+        tag = instructions[idx + 1][0] & 0xF
+
+        # Check for ALU special case
+
+        if is_tag_alu(tag) and idx + 2 == len(instructions):
+            tag = 1
+    else:
+        # Instruction stream over
+        
+        tag = 1
+
+    ins[0] |= tag << 4
+
+    binary += ins
+
+pprint.pprint(program)
+
+with open(sys.argv[2], "wb") as f:
+    f.write(binary)
--- a/src/gallium/drivers/panfrost/midgard/cmdline.c
+++ b/src/gallium/drivers/panfrost/midgard/cmdline.c
@ -0,0 +1,145 @@
+/*
+ * Copyright (C) 2018 Alyssa Rosenzweig <alyssa@rosenzweig.io>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "compiler/glsl/standalone.h"
+#include "compiler/glsl/glsl_to_nir.h"
+#include "compiler/nir_types.h"
+#include "midgard_compile.h"
+#include "disassemble.h"
+#include "util/u_dynarray.h"
+#include "main/mtypes.h"
+
+bool c_do_mat_op_to_vec(struct exec_list *instructions);
+
+static void
+finalise_to_disk(const char *filename, struct util_dynarray *data)
+{
+        FILE *fp;
+        fp = fopen(filename, "wb");
+        fwrite(data->data, 1, data->size, fp);
+        fclose(fp);
+
+        util_dynarray_fini(data);
+}
+
+static void
+compile_shader(char **argv)
+{
+        struct gl_shader_program *prog;
+        nir_shader *nir;
+
+        struct standalone_options options = {
+                .glsl_version = 140,
+                .do_link = true,
+        };
+
+        prog = standalone_compile_shader(&options, 2, argv);
+        prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program->info.stage = MESA_SHADER_FRAGMENT;
+
+        for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
+                if (prog->_LinkedShaders[i] == NULL)
+                        continue;
+
+                c_do_mat_op_to_vec(prog->_LinkedShaders[i]->ir);
+        }
+
+        midgard_program compiled;
+        nir = glsl_to_nir(prog, MESA_SHADER_VERTEX, &midgard_nir_options);
+        midgard_compile_shader_nir(nir, &compiled, false);
+        finalise_to_disk("vertex.bin", &compiled.compiled);
+
+        nir = glsl_to_nir(prog, MESA_SHADER_FRAGMENT, &midgard_nir_options);
+        midgard_compile_shader_nir(nir, &compiled, false);
+        finalise_to_disk("fragment.bin", &compiled.compiled);
+}
+
+static void
+compile_blend(char **argv)
+{
+        struct gl_shader_program *prog;
+        nir_shader *nir;
+
+        struct standalone_options options = {
+                .glsl_version = 140,
+        };
+
+        prog = standalone_compile_shader(&options, 1, argv);
+        prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program->info.stage = MESA_SHADER_FRAGMENT;
+
+#if 0
+
+        for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
+                if (prog->_LinkedShaders[i] == NULL)
+                        continue;
+
+                c_do_mat_op_to_vec(prog->_LinkedShaders[i]->ir);
+        }
+
+#endif
+
+        midgard_program program;
+        nir = glsl_to_nir(prog, MESA_SHADER_FRAGMENT, &midgard_nir_options);
+        midgard_compile_shader_nir(nir, &program, true);
+        finalise_to_disk("blend.bin", &program.compiled);
+}
+
+static void
+disassemble(const char *filename)
+{
+        FILE *fp = fopen(filename, "rb");
+        assert(fp);
+
+        fseek(fp, 0, SEEK_END);
+        int filesize = ftell(fp);
+        rewind(fp);
+
+        unsigned char *code = malloc(filesize);
+        fread(code, 1, filesize, fp);
+        fclose(fp);
+
+        disassemble_midgard(code, filesize);
+        free(code);
+}
+
+int
+main(int argc, char **argv)
+{
+        if (argc < 2) {
+                fprintf(stderr, "Usage: midgard_compiler command [args]\n");
+                fprintf(stderr, "midgard_compiler compile program.vert program.frag\n");
+                fprintf(stderr, "midgard_compiler blend program.blend\n");
+                fprintf(stderr, "midgard_compiler disasm binary.bin\n");
+                exit(1);
+        }
+
+        if (strcmp(argv[1], "compile") == 0) {
+                compile_shader(&argv[2]);
+        } else if (strcmp(argv[1], "blend") == 0) {
+                compile_blend(&argv[2]);
+        } else if (strcmp(argv[1], "disasm") == 0) {
+                disassemble(argv[2]);
+        } else {
+                fprintf(stderr, "Unknown command\n");
+                exit(1);
+        }
+}
--- a/src/gallium/drivers/panfrost/midgard/cppwrap.cpp
+++ b/src/gallium/drivers/panfrost/midgard/cppwrap.cpp
@ -0,0 +1,9 @@
+struct exec_list;
+
+bool do_mat_op_to_vec(struct exec_list *instructions);
+
+extern "C" {
+	bool c_do_mat_op_to_vec(struct exec_list *instructions) {
+		return do_mat_op_to_vec(instructions);
+	}
+};
--- a/src/gallium/drivers/panfrost/midgard/disassemble.c
+++ b/src/gallium/drivers/panfrost/midgard/disassemble.c
@ -0,0 +1,986 @@
+/* Author(s):
+ *   Connor Abbott
+ *   Alyssa Rosenzweig
+ *
+ * Copyright (c) 2013 Connor Abbott (connor@abbott.cx)
+ * Copyright (c) 2018 Alyssa Rosenzweig (alyssa@rosenzweig.io)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <assert.h>
+#include <inttypes.h>
+#include <string.h>
+#include "midgard.h"
+#include "midgard-parse.h"
+#include "disassemble.h"
+#include "util/half_float.h"
+
+#define DEFINE_CASE(define, str) case define: { printf(str); break; }
+
+static bool is_instruction_int = false;
+
+static void
+print_alu_opcode(midgard_alu_op op)
+{
+        bool int_op = false;
+
+        if (alu_opcode_names[op]) {
+                printf("%s", alu_opcode_names[op]);
+
+                int_op = alu_opcode_names[op][0] == 'i';
+        } else
+                printf("alu_op_%02X", op);
+
+        /* For constant analysis */
+        is_instruction_int = int_op;
+}
+
+static void
+print_ld_st_opcode(midgard_load_store_op op)
+{
+        if (load_store_opcode_names[op])
+                printf("%s", load_store_opcode_names[op]);
+        else
+                printf("ldst_op_%02X", op);
+}
+
+static bool is_embedded_constant_half = false;
+static bool is_embedded_constant_int = false;
+
+static void
+print_reg(unsigned reg, bool half)
+{
+        /* Perform basic static analysis for expanding constants correctly */
+
+        if (half && (reg >> 1) == 26) {
+                is_embedded_constant_half = true;
+                is_embedded_constant_int = is_instruction_int;
+        } else if (!half && reg == 26) {
+                is_embedded_constant_int = is_instruction_int;
+        }
+
+        if (half)
+                printf("h");
+
+        printf("r%u", reg);
+}
+
+static char *outmod_names[4] = {
+        "",
+        ".pos",
+        "",
+        ".sat"
+};
+
+static void
+print_outmod(midgard_outmod outmod)
+{
+        printf("%s", outmod_names[outmod]);
+}
+
+static void
+print_quad_word(uint32_t *words, unsigned tabs)
+{
+        unsigned i;
+
+        for (i = 0; i < 4; i++)
+                printf("0x%08X%s ", words[i], i == 3 ? "" : ",");
+
+        printf("\n");
+}
+
+static void
+print_vector_src(unsigned src_binary, bool out_high,
+                 bool out_half, unsigned reg)
+{
+        midgard_vector_alu_src *src = (midgard_vector_alu_src *)&src_binary;
+
+        if (src->negate)
+                printf("-");
+
+        if (src->abs)
+                printf("abs(");
+
+        //register
+
+        if (out_half) {
+                if (src->half)
+                        printf(" /* half */ ");
+
+                unsigned half_reg;
+
+                if (out_high) {
+                        if (src->rep_low)
+                                half_reg = reg * 2;
+                        else
+                                half_reg = reg * 2 + 1;
+
+                        if (src->rep_high)
+                                printf(" /* rep_high */ ");
+                } else {
+                        if (src->rep_high)
+                                half_reg = reg * 2 + 1;
+                        else
+                                half_reg = reg * 2;
+
+                        if (src->rep_low)
+                                printf(" /* rep_low */ ");
+                }
+
+                print_reg(half_reg, true);
+        } else {
+                if (src->rep_high)
+                        printf(" /* rep_high */ ");
+
+                if (src->half)
+                        print_reg(reg * 2 + src->rep_low, true);
+                else {
+                        if (src->rep_low)
+                                printf(" /* rep_low */ ");
+
+                        print_reg(reg, false);
+                }
+        }
+
+        //swizzle
+
+        if (src->swizzle != 0xE4) { //default swizzle
+                unsigned i;
+                static const char c[4] = "xyzw";
+
+                printf(".");
+
+                for (i = 0; i < 4; i++)
+                        printf("%c", c[(src->swizzle >> (i * 2)) & 3]);
+        }
+
+        if (src->abs)
+                printf(")");
+}
+
+static uint16_t
+decode_vector_imm(unsigned src2_reg, unsigned imm)
+{
+        uint16_t ret;
+        ret = src2_reg << 11;
+        ret |= (imm & 0x7) << 8;
+        ret |= (imm >> 3) & 0xFF;
+        return ret;
+}
+
+static void
+print_immediate(uint16_t imm)
+{
+        if (is_instruction_int)
+                printf("#%d", imm);
+        else
+                printf("#%g", _mesa_half_to_float(imm));
+}
+
+static void
+print_vector_field(const char *name, uint16_t *words, uint16_t reg_word,
+                   unsigned tabs)
+{
+        midgard_reg_info *reg_info = (midgard_reg_info *)&reg_word;
+        midgard_vector_alu *alu_field = (midgard_vector_alu *) words;
+
+        if (alu_field->reg_mode != midgard_reg_mode_half &&
+                        alu_field->reg_mode != midgard_reg_mode_full) {
+                printf("unknown reg mode %u\n", alu_field->reg_mode);
+        }
+
+        /* For now, prefix instruction names with their unit, until we
+         * understand how this works on a deeper level */
+        printf("%s.", name);
+
+        print_alu_opcode(alu_field->op);
+        print_outmod(alu_field->outmod);
+        printf(" ");
+
+        bool half, out_half, out_high = false;
+        unsigned mask;
+
+        half = (alu_field->reg_mode == midgard_reg_mode_half);
+
+        if (half) {
+                if (alu_field->mask & 0xF) {
+                        out_high = false;
+
+                        if ((alu_field->mask & 0xF0))
+                                printf("/* %X */ ", alu_field->mask);
+
+                        mask = alu_field->mask;
+                } else {
+                        out_high = true;
+                        mask = alu_field->mask >> 4;
+                }
+        } else {
+                mask = alu_field->mask & 1;
+                mask |= (alu_field->mask & 4) >> 1;
+                mask |= (alu_field->mask & 16) >> 2;
+                mask |= (alu_field->mask & 64) >> 3;
+        }
+
+        out_half = half;
+
+        if (alu_field->dest_override != midgard_dest_override_none) {
+                if (out_half)
+                        printf("/* half */ ");
+
+                out_half = true;
+
+                if (alu_field->dest_override == midgard_dest_override_lower)
+                        out_high = false;
+                else if (alu_field->dest_override == midgard_dest_override_upper)
+                        out_high = true;
+                else
+                        assert(0);
+        }
+
+        if (out_half) {
+                if (out_high)
+                        print_reg(2 * reg_info->out_reg + 1, true);
+                else
+                        print_reg(2 * reg_info->out_reg, true);
+        } else
+                print_reg(reg_info->out_reg, false);
+
+        if (mask != 0xF) {
+                unsigned i;
+                static const char c[4] = "xyzw";
+
+                printf(".");
+
+                for (i = 0; i < 4; i++)
+                        if (mask & (1 << i))
+                                printf("%c", c[i]);
+        }
+
+        printf(", ");
+
+        print_vector_src(alu_field->src1, out_high, half, reg_info->src1_reg);
+
+        printf(", ");
+
+        if (reg_info->src2_imm) {
+                uint16_t imm = decode_vector_imm(reg_info->src2_reg, alu_field->src2 >> 2);
+                print_immediate(imm);
+        } else {
+                print_vector_src(alu_field->src2, out_high, half,
+                                 reg_info->src2_reg);
+        }
+
+        printf("\n");
+}
+
+static void
+print_scalar_src(unsigned src_binary, unsigned reg)
+{
+        midgard_scalar_alu_src *src = (midgard_scalar_alu_src *)&src_binary;
+
+        if (src->negate)
+                printf("-");
+
+        if (src->abs)
+                printf("abs(");
+
+        if (src->full)
+                print_reg(reg, false);
+        else
+                print_reg(reg * 2 + (src->component >> 2), true);
+
+        static const char c[4] = "xyzw";
+        \
+        printf(".%c", c[src->full ? src->component >> 1 : src->component & 3]);
+
+        if (src->abs)
+                printf(")");
+
+}
+
+static uint16_t
+decode_scalar_imm(unsigned src2_reg, unsigned imm)
+{
+        uint16_t ret;
+        ret = src2_reg << 11;
+        ret |= (imm & 3) << 9;
+        ret |= (imm & 4) << 6;
+        ret |= (imm & 0x38) << 2;
+        ret |= imm >> 6;
+        return ret;
+}
+
+static void
+print_scalar_field(const char *name, uint16_t *words, uint16_t reg_word,
+                   unsigned tabs)
+{
+        midgard_reg_info *reg_info = (midgard_reg_info *)&reg_word;
+        midgard_scalar_alu *alu_field = (midgard_scalar_alu *) words;
+
+        if (alu_field->unknown)
+                printf("scalar ALU unknown bit set\n");
+
+        printf("%s.", name);
+        print_alu_opcode(alu_field->op);
+        print_outmod(alu_field->outmod);
+        printf(" ");
+
+        if (alu_field->output_full)
+                print_reg(reg_info->out_reg, false);
+        else
+                print_reg(reg_info->out_reg * 2 + (alu_field->output_component >> 2),
+                          true);
+
+        static const char c[4] = "xyzw";
+        printf(".%c, ",
+               c[alu_field->output_full ? alu_field->output_component >> 1 :
+                                        alu_field->output_component & 3]);
+
+        print_scalar_src(alu_field->src1, reg_info->src1_reg);
+
+        printf(", ");
+
+        if (reg_info->src2_imm) {
+                uint16_t imm = decode_scalar_imm(reg_info->src2_reg,
+                                                 alu_field->src2);
+                print_immediate(imm);
+        } else
+                print_scalar_src(alu_field->src2, reg_info->src2_reg);
+
+        printf("\n");
+}
+
+static void
+print_branch_op(int op)
+{
+        switch (op) {
+        case midgard_jmp_writeout_op_branch_cond:
+                printf("cond.");
+                break;
+
+        case midgard_jmp_writeout_op_writeout:
+                printf("write.");
+                break;
+
+        case midgard_jmp_writeout_op_discard:
+                printf("discard.");
+                break;
+
+        default:
+                printf("unk%d.", op);
+                break;
+        }
+}
+
+static void
+print_branch_cond(int cond)
+{
+        switch (cond) {
+        case midgard_condition_write0:
+                printf("write0");
+                break;
+
+        case midgard_condition_false:
+                printf("false");
+                break;
+
+        case midgard_condition_true:
+                printf("true");
+                break;
+
+        case midgard_condition_always:
+                printf("always");
+                break;
+
+        default:
+                break;
+        }
+}
+
+static void
+print_compact_branch_writeout_field(uint16_t word)
+{
+        midgard_jmp_writeout_op op = word & 0x7;
+
+        switch (op) {
+        case midgard_jmp_writeout_op_branch_uncond: {
+                midgard_branch_uncond br_uncond;
+                memcpy((char *) &br_uncond, (char *) &word, sizeof(br_uncond));
+                printf("br.uncond ");
+
+                if (br_uncond.unknown != 1)
+                        printf("unknown:%d, ", br_uncond.unknown);
+
+                if (br_uncond.offset >= 0)
+                        printf("+");
+
+                printf("%d", br_uncond.offset);
+
+                printf(" -> %X\n", br_uncond.dest_tag);
+                break;
+        }
+
+        case midgard_jmp_writeout_op_branch_cond:
+        case midgard_jmp_writeout_op_writeout:
+        case midgard_jmp_writeout_op_discard:
+        default: {
+                midgard_branch_cond br_cond;
+                memcpy((char *) &br_cond, (char *) &word, sizeof(br_cond));
+
+                printf("br.");
+
+                print_branch_op(br_cond.op);
+                print_branch_cond(br_cond.cond);
+
+                printf(" ");
+
+                if (br_cond.offset >= 0)
+                        printf("+");
+
+                printf("%d", br_cond.offset);
+
+                printf(" -> %X\n", br_cond.dest_tag);
+                break;
+        }
+        }
+}
+
+static void
+print_extended_branch_writeout_field(uint8_t *words)
+{
+        midgard_branch_extended br;
+        memcpy((char *) &br, (char *) words, sizeof(br));
+
+        printf("br.");
+
+        print_branch_op(br.op);
+        print_branch_cond(br.cond);
+
+        /* XXX: This can't be right */
+        if (br.unknown)
+                printf(".unknown%d\n", br.unknown);
+
+        if (br.zero)
+                printf(".zero%d\n", br.zero);
+
+        printf(" ");
+
+        if (br.offset >= 0)
+                printf("+");
+
+        printf("%d", br.offset);
+
+        printf(" -> %X\n", br.dest_tag);
+}
+
+static unsigned
+num_alu_fields_enabled(uint32_t control_word)
+{
+        unsigned ret = 0;
+
+        if ((control_word >> 17) & 1)
+                ret++;
+
+        if ((control_word >> 19) & 1)
+                ret++;
+
+        if ((control_word >> 21) & 1)
+                ret++;
+
+        if ((control_word >> 23) & 1)
+                ret++;
+
+        if ((control_word >> 25) & 1)
+                ret++;
+
+        return ret;
+}
+
+static float
+float_bitcast(uint32_t integer)
+{
+        union {
+                uint32_t i;
+                float f;
+        } v;
+
+        v.i = integer;
+        return v.f;
+}
+
+static void
+print_alu_word(uint32_t *words, unsigned num_quad_words,
+               unsigned tabs)
+{
+        uint32_t control_word = words[0];
+        uint16_t *beginning_ptr = (uint16_t *)(words + 1);
+        unsigned num_fields = num_alu_fields_enabled(control_word);
+        uint16_t *word_ptr = beginning_ptr + num_fields;
+        unsigned num_words = 2 + num_fields;
+
+        if ((control_word >> 16) & 1)
+                printf("unknown bit 16 enabled\n");
+
+        if ((control_word >> 17) & 1) {
+                print_vector_field("vmul", word_ptr, *beginning_ptr, tabs);
+                beginning_ptr += 1;
+                word_ptr += 3;
+                num_words += 3;
+        }
+
+        if ((control_word >> 18) & 1)
+                printf("unknown bit 18 enabled\n");
+
+        if ((control_word >> 19) & 1) {
+                print_scalar_field("sadd", word_ptr, *beginning_ptr, tabs);
+                beginning_ptr += 1;
+                word_ptr += 2;
+                num_words += 2;
+        }
+
+        if ((control_word >> 20) & 1)
+                printf("unknown bit 20 enabled\n");
+
+        if ((control_word >> 21) & 1) {
+                print_vector_field("vadd", word_ptr, *beginning_ptr, tabs);
+                beginning_ptr += 1;
+                word_ptr += 3;
+                num_words += 3;
+        }
+
+        if ((control_word >> 22) & 1)
+                printf("unknown bit 22 enabled\n");
+
+        if ((control_word >> 23) & 1) {
+                print_scalar_field("smul", word_ptr, *beginning_ptr, tabs);
+                beginning_ptr += 1;
+                word_ptr += 2;
+                num_words += 2;
+        }
+
+        if ((control_word >> 24) & 1)
+                printf("unknown bit 24 enabled\n");
+
+        if ((control_word >> 25) & 1) {
+                print_vector_field("lut", word_ptr, *beginning_ptr, tabs);
+                beginning_ptr += 1;
+                word_ptr += 3;
+                num_words += 3;
+        }
+
+        if ((control_word >> 26) & 1) {
+                print_compact_branch_writeout_field(*word_ptr);
+                word_ptr += 1;
+                num_words += 1;
+        }
+
+        if ((control_word >> 27) & 1) {
+                print_extended_branch_writeout_field((uint8_t *) word_ptr);
+                word_ptr += 3;
+                num_words += 3;
+        }
+
+        if (num_quad_words > (num_words + 7) / 8) {
+                assert(num_quad_words == (num_words + 15) / 8);
+                //Assume that the extra quadword is constants
+                void *consts = words + (4 * num_quad_words - 4);
+
+                if (is_embedded_constant_int) {
+                        if (is_embedded_constant_half) {
+                                int16_t *sconsts = (int16_t *) consts;
+                                printf("sconstants %d, %d, %d, %d\n",
+                                       sconsts[0],
+                                       sconsts[1],
+                                       sconsts[2],
+                                       sconsts[3]);
+                        } else {
+                                int32_t *iconsts = (int32_t *) consts;
+                                printf("iconstants %d, %d, %d, %d\n",
+                                       iconsts[0],
+                                       iconsts[1],
+                                       iconsts[2],
+                                       iconsts[3]);
+                        }
+                } else {
+                        if (is_embedded_constant_half) {
+                                uint16_t *hconsts = (uint16_t *) consts;
+                                printf("hconstants %g, %g, %g, %g\n",
+                                       _mesa_half_to_float(hconsts[0]),
+                                       _mesa_half_to_float(hconsts[1]),
+                                       _mesa_half_to_float(hconsts[2]),
+                                       _mesa_half_to_float(hconsts[3]));
+                        } else {
+                                uint32_t *fconsts = (uint32_t *) consts;
+                                printf("fconstants %g, %g, %g, %g\n",
+                                       float_bitcast(fconsts[0]),
+                                       float_bitcast(fconsts[1]),
+                                       float_bitcast(fconsts[2]),
+                                       float_bitcast(fconsts[3]));
+                        }
+
+                }
+        }
+}
+
+/* Swizzle/mask formats are common between load/store ops and texture ops, it
+ * looks like... */
+
+static void
+print_swizzle(uint32_t swizzle)
+{
+        unsigned i;
+
+        if (swizzle != 0xE4) {
+                printf(".");
+
+                for (i = 0; i < 4; i++)
+                        printf("%c", "xyzw"[(swizzle >> (2 * i)) & 3]);
+        }
+}
+
+static void
+print_mask(uint32_t mask)
+{
+        unsigned i;
+
+        if (mask != 0xF) {
+                printf(".");
+
+                for (i = 0; i < 4; i++)
+                        if (mask & (1 << i))
+                                printf("%c", "xyzw"[i]);
+
+                /* Handle degenerate case */
+                if (mask == 0)
+                        printf("0");
+        }
+}
+
+static void
+print_varying_parameters(midgard_load_store_word *word)
+{
+        midgard_varying_parameter param;
+        unsigned v = word->varying_parameters;
+        memcpy(&param, &v, sizeof(param));
+
+        if (param.is_varying) {
+                /* If a varying, there are qualifiers */
+                if (param.flat)
+                        printf(".flat");
+
+                if (param.interpolation != midgard_interp_default) {
+                        if (param.interpolation == midgard_interp_centroid)
+                                printf(".centroid");
+                        else
+                                printf(".interp%d", param.interpolation);
+                }
+        } else if (param.flat || param.interpolation) {
+                printf(" /* is_varying not set but varying metadata attached */");
+        }
+
+        if (param.zero1 || param.zero2)
+                printf(" /* zero tripped, %d %d */ ", param.zero1, param.zero2);
+}
+
+static bool
+is_op_varying(unsigned op)
+{
+        switch (op) {
+        case midgard_op_store_vary_16:
+        case midgard_op_store_vary_32:
+        case midgard_op_load_vary_16:
+        case midgard_op_load_vary_32:
+                return true;
+        }
+
+        return false;
+}
+
+static void
+print_load_store_instr(uint64_t data,
+                       unsigned tabs)
+{
+        midgard_load_store_word *word = (midgard_load_store_word *) &data;
+
+        print_ld_st_opcode(word->op);
+
+        if (is_op_varying(word->op))
+                print_varying_parameters(word);
+
+        printf(" r%d", word->reg);
+        print_mask(word->mask);
+
+        int address = word->address;
+
+        if (word->op == midgard_op_load_uniform_32) {
+                /* Uniforms use their own addressing scheme */
+
+                int lo = word->varying_parameters >> 7;
+                int hi = word->address;
+
+                /* TODO: Combine fields logically */
+                address = (hi << 3) | lo;
+        }
+
+        printf(", %d", address);
+
+        print_swizzle(word->swizzle);
+
+        printf(", 0x%X\n", word->unknown);
+}
+
+static void
+print_load_store_word(uint32_t *word, unsigned tabs)
+{
+        midgard_load_store *load_store = (midgard_load_store *) word;
+
+        if (load_store->word1 != 3) {
+                print_load_store_instr(load_store->word1, tabs);
+        }
+
+        if (load_store->word2 != 3) {
+                print_load_store_instr(load_store->word2, tabs);
+        }
+}
+
+static void
+print_texture_reg(bool full, bool select, bool upper)
+{
+        if (full)
+                printf("r%d", REG_TEX_BASE + select);
+        else
+                printf("hr%d", (REG_TEX_BASE + select) * 2 + upper);
+
+        if (full && upper)
+                printf("// error: out full / upper mutually exclusive\n");
+
+}
+
+static void
+print_texture_format(int format)
+{
+        /* Act like a modifier */
+        printf(".");
+
+        switch (format) {
+                DEFINE_CASE(TEXTURE_2D, "2d");
+                DEFINE_CASE(TEXTURE_3D, "3d");
+
+        default:
+                printf("fmt_%d", format);
+                break;
+        }
+}
+
+static void
+print_texture_op(int format)
+{
+        /* Act like a modifier */
+        printf(".");
+
+        switch (format) {
+                DEFINE_CASE(TEXTURE_OP_NORMAL, "normal");
+                DEFINE_CASE(TEXTURE_OP_TEXEL_FETCH, "texelfetch");
+
+        default:
+                printf("op_%d", format);
+                break;
+        }
+}
+
+#undef DEFINE_CASE
+
+static void
+print_texture_word(uint32_t *word, unsigned tabs)
+{
+        midgard_texture_word *texture = (midgard_texture_word *) word;
+
+        /* Instruction family, like ALU words have theirs */
+        printf("texture");
+
+        /* Broad category of texture operation in question */
+        print_texture_op(texture->op);
+
+        /* Specific format in question */
+        print_texture_format(texture->format);
+
+        /* Instruction "modifiers" parallel the ALU instructions. First group
+         * are modifiers that act alone */
+
+        if (!texture->filter)
+                printf(".raw");
+
+        if (texture->shadow)
+                printf(".shadow");
+
+        if (texture->cont)
+                printf(".cont");
+
+        if (texture->last)
+                printf(".last");
+
+        /* Second set are modifiers which take an extra argument each */
+
+        if (texture->has_offset)
+                printf(".offset");
+
+        if (texture->bias)
+                printf(".bias");
+
+        printf(" ");
+
+        print_texture_reg(texture->out_full, texture->out_reg_select, texture->out_upper);
+        print_mask(texture->mask);
+        printf(", ");
+
+        printf("texture%d, ", texture->texture_handle);
+
+        printf("sampler%d", texture->sampler_handle);
+        print_swizzle(texture->swizzle);
+        printf(", ");
+
+        print_texture_reg(/*texture->in_reg_full*/true, texture->in_reg_select, texture->in_reg_upper);
+        printf(".%c%c, ", "xyzw"[texture->in_reg_swizzle_left],
+               "xyzw"[texture->in_reg_swizzle_right]);
+
+        /* TODO: can offsets be full words? */
+        if (texture->has_offset) {
+                print_texture_reg(false, texture->offset_reg_select, texture->offset_reg_upper);
+                printf(", ");
+        }
+
+        if (texture->bias)
+                printf("%f, ", texture->bias / 256.0f);
+
+        printf("\n");
+
+        /* While not zero in general, for these simple instructions the
+         * following unknowns are zero, so we don't include them */
+
+        if (texture->unknown1 ||
+                        texture->unknown2 ||
+                        texture->unknown3 ||
+                        texture->unknown4 ||
+                        texture->unknownA ||
+                        texture->unknownB ||
+                        texture->unknown8 ||
+                        texture->unknown9) {
+                printf("// unknown1 = 0x%x\n", texture->unknown1);
+                printf("// unknown2 = 0x%x\n", texture->unknown2);
+                printf("// unknown3 = 0x%x\n", texture->unknown3);
+                printf("// unknown4 = 0x%x\n", texture->unknown4);
+                printf("// unknownA = 0x%x\n", texture->unknownA);
+                printf("// unknownB = 0x%x\n", texture->unknownB);
+                printf("// unknown8 = 0x%x\n", texture->unknown8);
+                printf("// unknown9 = 0x%x\n", texture->unknown9);
+        }
+
+        /* Similarly, if no offset is applied, these are zero. If an offset
+         * -is- applied, or gradients are used, etc, these are nonzero but
+         *  largely unknown still. */
+
+        if (texture->offset_unknown1 ||
+                        texture->offset_reg_select ||
+                        texture->offset_reg_upper ||
+                        texture->offset_unknown4 ||
+                        texture->offset_unknown5 ||
+                        texture->offset_unknown6 ||
+                        texture->offset_unknown7 ||
+                        texture->offset_unknown8 ||
+                        texture->offset_unknown9) {
+                printf("// offset_unknown1 = 0x%x\n", texture->offset_unknown1);
+                printf("// offset_reg_select = 0x%x\n", texture->offset_reg_select);
+                printf("// offset_reg_upper = 0x%x\n", texture->offset_reg_upper);
+                printf("// offset_unknown4 = 0x%x\n", texture->offset_unknown4);
+                printf("// offset_unknown5 = 0x%x\n", texture->offset_unknown5);
+                printf("// offset_unknown6 = 0x%x\n", texture->offset_unknown6);
+                printf("// offset_unknown7 = 0x%x\n", texture->offset_unknown7);
+                printf("// offset_unknown8 = 0x%x\n", texture->offset_unknown8);
+                printf("// offset_unknown9 = 0x%x\n", texture->offset_unknown9);
+        }
+
+        /* Don't blow up */
+        if (texture->unknown7 != 0x1)
+                printf("// (!) unknown7 = %d\n", texture->unknown7);
+}
+
+void
+disassemble_midgard(uint8_t *code, size_t size)
+{
+        uint32_t *words = (uint32_t *) code;
+        unsigned num_words = size / 4;
+        int tabs = 0;
+
+        bool prefetch_flag = false;
+
+        unsigned i = 0;
+
+        while (i < num_words) {
+                unsigned num_quad_words = midgard_word_size[words[i] & 0xF];
+
+                switch (midgard_word_types[words[i] & 0xF]) {
+                case midgard_word_type_texture:
+                        print_texture_word(&words[i], tabs);
+                        break;
+
+                case midgard_word_type_load_store:
+                        print_load_store_word(&words[i], tabs);
+                        break;
+
+                case midgard_word_type_alu:
+                        print_alu_word(&words[i], num_quad_words, tabs);
+
+                        if (prefetch_flag)
+                                return;
+
+                        /* Reset word static analysis state */
+                        is_embedded_constant_half = false;
+                        is_embedded_constant_int = false;
+
+                        break;
+
+                default:
+                        printf("Unknown word type %u:\n", words[i] & 0xF);
+                        num_quad_words = 1;
+                        print_quad_word(&words[i], tabs);
+                        printf("\n");
+                        break;
+                }
+
+                printf("\n");
+
+                unsigned next = (words[i] & 0xF0) >> 4;
+
+                i += 4 * num_quad_words;
+
+                /* Break based on instruction prefetch flag */
+
+                if (i < num_words && next == 1) {
+                        prefetch_flag = true;
+
+                        if (midgard_word_types[words[i] & 0xF] != midgard_word_type_alu)
+                                return;
+                }
+        }
+
+        return;
+}
--- a/src/gallium/drivers/panfrost/midgard/disassemble.h
+++ b/src/gallium/drivers/panfrost/midgard/disassemble.h
@ -0,0 +1,2 @@
+#include <stddef.h>
+void disassemble_midgard(uint8_t *code, size_t size);
--- a/src/gallium/drivers/panfrost/midgard/helpers.h
+++ b/src/gallium/drivers/panfrost/midgard/helpers.h
@ -0,0 +1,236 @@
+/* Author(s):
+ *  Alyssa Rosenzweig
+ *
+ * Copyright (c) 2018 Alyssa Rosenzweig (alyssa@rosenzweig.io)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/* Some constants and macros not found in the disassembler */
+
+#define OP_IS_STORE(op) (\
+		op == midgard_op_store_vary_16 || \
+		op == midgard_op_store_vary_32 \
+	)
+
+/* ALU control words are single bit fields with a lot of space */
+
+#define ALU_ENAB_VEC_MUL  (1 << 17)
+#define ALU_ENAB_SCAL_ADD  (1 << 19)
+#define ALU_ENAB_VEC_ADD  (1 << 21)
+#define ALU_ENAB_SCAL_MUL  (1 << 23)
+#define ALU_ENAB_VEC_LUT  (1 << 25)
+#define ALU_ENAB_BR_COMPACT (1 << 26)
+#define ALU_ENAB_BRANCH   (1 << 27)
+
+/* Other opcode properties that don't conflict with the ALU_ENABs, non-ISA */
+
+/* Denotes an opcode that takes a vector input with a fixed-number of
+ * channels, but outputs to only a single output channel, like dot products.
+ * For these, to determine the effective mask, this quirk can be set. We have
+ * an intentional off-by-one (a la MALI_POSITIVE), since 0-channel makes no
+ * sense but we need to fit 4 channels in 2-bits. Similarly, 1-channel doesn't
+ * make sense (since then why are we quirked?), so that corresponds to "no
+ * count set" */
+
+#define OP_CHANNEL_COUNT(c) ((c - 1) << 0)
+#define GET_CHANNEL_COUNT(c) ((c & (0x3 << 0)) ? ((c & (0x3 << 0)) + 1) : 0)
+
+/* Vector-independant shorthands for the above; these numbers are arbitrary and
+ * not from the ISA. Convert to the above with unit_enum_to_midgard */
+
+#define UNIT_MUL 0
+#define UNIT_ADD 1
+#define UNIT_LUT 2
+
+/* 4-bit type tags */
+
+#define TAG_TEXTURE_4 0x3
+#define TAG_LOAD_STORE_4 0x5
+#define TAG_ALU_4 0x8
+#define TAG_ALU_8 0x9
+#define TAG_ALU_12 0xA
+#define TAG_ALU_16 0xB
+
+/* Special register aliases */
+
+#define MAX_WORK_REGISTERS 16
+
+/* Uniforms are begin at (REGISTER_UNIFORMS - uniform_count) */
+#define REGISTER_UNIFORMS 24
+
+#define REGISTER_UNUSED 24
+#define REGISTER_CONSTANT 26
+#define REGISTER_VARYING_BASE 26
+#define REGISTER_OFFSET 27
+#define REGISTER_TEXTURE_BASE 28
+#define REGISTER_SELECT 31
+
+/* Special uniforms used for e.g. vertex epilogues */
+#define SPECIAL_UNIFORM_BASE (1 << 24)
+#define UNIFORM_VIEWPORT (SPECIAL_UNIFORM_BASE + 0)
+
+/* SSA helper aliases to mimic the registers. UNUSED_0 encoded as an inline
+ * constant. UNUSED_1 encoded as REGISTER_UNUSED */
+
+#define SSA_UNUSED_0 0
+#define SSA_UNUSED_1 -2
+
+#define SSA_FIXED_SHIFT 24
+#define SSA_FIXED_REGISTER(reg) ((1 + reg) << SSA_FIXED_SHIFT)
+#define SSA_REG_FROM_FIXED(reg) ((reg >> SSA_FIXED_SHIFT) - 1)
+#define SSA_FIXED_MINIMUM SSA_FIXED_REGISTER(0)
+
+/* Swizzle support */
+
+#define SWIZZLE(A, B, C, D) ((D << 6) | (C << 4) | (B << 2) | (A << 0))
+#define SWIZZLE_FROM_ARRAY(r) SWIZZLE(r[0], r[1], r[2], r[3])
+#define COMPONENT_X 0x0
+#define COMPONENT_Y 0x1
+#define COMPONENT_Z 0x2
+#define COMPONENT_W 0x3
+
+/* See ISA notes */
+
+#define LDST_NOP (3)
+
+/* Is this opcode that of an integer? */
+static bool
+midgard_is_integer_op(int op)
+{
+        switch (op) {
+        case midgard_alu_op_iadd:
+        case midgard_alu_op_ishladd:
+        case midgard_alu_op_isub:
+        case midgard_alu_op_imul:
+        case midgard_alu_op_imin:
+        case midgard_alu_op_imax:
+        case midgard_alu_op_iasr:
+        case midgard_alu_op_ilsr:
+        case midgard_alu_op_ishl:
+        case midgard_alu_op_iand:
+        case midgard_alu_op_ior:
+        case midgard_alu_op_inot:
+        case midgard_alu_op_iandnot:
+        case midgard_alu_op_ixor:
+        case midgard_alu_op_imov:
+
+        //case midgard_alu_op_f2i:
+        //case midgard_alu_op_f2u:
+        case midgard_alu_op_ieq:
+        case midgard_alu_op_ine:
+        case midgard_alu_op_ilt:
+        case midgard_alu_op_ile:
+        case midgard_alu_op_iball_eq:
+        case midgard_alu_op_ibany_neq:
+
+        //case midgard_alu_op_i2f:
+        //case midgard_alu_op_u2f:
+        case midgard_alu_op_icsel:
+                return true;
+
+        default:
+                return false;
+        }
+}
+
+/* There are five ALU units: VMUL, VADD, SMUL, SADD, LUT. A given opcode is
+ * implemented on some subset of these units (or occassionally all of them).
+ * This table encodes a bit mask of valid units for each opcode, so the
+ * scheduler can figure where to plonk the instruction. */
+
+/* Shorthands for each unit */
+#define UNIT_VMUL ALU_ENAB_VEC_MUL
+#define UNIT_SADD ALU_ENAB_SCAL_ADD
+#define UNIT_VADD ALU_ENAB_VEC_ADD
+#define UNIT_SMUL ALU_ENAB_SCAL_MUL
+#define UNIT_VLUT ALU_ENAB_VEC_LUT
+
+/* Shorthands for usual combinations of units. LUT is intentionally excluded
+ * since it's nutty. */
+
+#define UNITS_MUL (UNIT_VMUL | UNIT_SMUL)
+#define UNITS_ADD (UNIT_VADD | UNIT_SADD)
+#define UNITS_ALL (UNITS_MUL | UNITS_ADD)
+#define UNITS_SCALAR (UNIT_SADD | UNIT_SMUL)
+#define UNITS_VECTOR (UNIT_VMUL | UNIT_VADD)
+#define UNITS_ANY_VECTOR (UNITS_VECTOR | UNIT_VLUT)
+
+static int alu_opcode_props[256] = {
+        [midgard_alu_op_fadd]		 = UNITS_ADD,
+        [midgard_alu_op_fmul]		 = UNITS_MUL | UNIT_VLUT,
+        [midgard_alu_op_fmin]		 = UNITS_MUL | UNITS_ADD,
+        [midgard_alu_op_fmax]		 = UNITS_MUL | UNITS_ADD,
+        [midgard_alu_op_imin]		 = UNITS_ALL,
+        [midgard_alu_op_imax]		 = UNITS_ALL,
+        [midgard_alu_op_fmov]		 = UNITS_ALL | UNIT_VLUT,
+        [midgard_alu_op_ffloor]		 = UNITS_ADD,
+        [midgard_alu_op_fceil]		 = UNITS_ADD,
+
+        /* Though they output a scalar, they need to run on a vector unit
+         * since they process vectors */
+        [midgard_alu_op_fdot3]		 = UNIT_VMUL | OP_CHANNEL_COUNT(3),
+        [midgard_alu_op_fdot4]		 = UNIT_VMUL | OP_CHANNEL_COUNT(4),
+
+        [midgard_alu_op_iadd]		 = UNITS_ADD,
+        [midgard_alu_op_isub]		 = UNITS_ADD,
+        [midgard_alu_op_imul]		 = UNITS_ALL,
+        [midgard_alu_op_imov]		 = UNITS_ALL,
+
+        /* For vector comparisons, use ball etc */
+        [midgard_alu_op_feq]		 = UNITS_ALL,
+        [midgard_alu_op_fne]		 = UNITS_ALL,
+        [midgard_alu_op_flt]		 = UNIT_SADD,
+        [midgard_alu_op_ieq]		 = UNITS_ALL,
+        [midgard_alu_op_ine]		 = UNITS_ALL,
+        [midgard_alu_op_ilt]		 = UNITS_ALL,
+        [midgard_alu_op_ile]		 = UNITS_ALL,
+
+        [midgard_alu_op_icsel]		 = UNITS_ADD,
+        [midgard_alu_op_fcsel]		 = UNITS_ADD | UNIT_SMUL,
+
+        [midgard_alu_op_frcp]		 = UNIT_VLUT,
+        [midgard_alu_op_frsqrt]		 = UNIT_VLUT,
+        [midgard_alu_op_fsqrt]		 = UNIT_VLUT,
+        [midgard_alu_op_fexp2]		 = UNIT_VLUT,
+        [midgard_alu_op_flog2]		 = UNIT_VLUT,
+
+        [midgard_alu_op_f2i]		 = UNITS_ADD,
+        [midgard_alu_op_f2u]		 = UNITS_ADD,
+        [midgard_alu_op_f2u8]		 = UNITS_ADD,
+        [midgard_alu_op_i2f]		 = UNITS_ADD,
+        [midgard_alu_op_u2f]		 = UNITS_ADD,
+
+        [midgard_alu_op_fsin]		 = UNIT_VLUT,
+        [midgard_alu_op_fcos]		 = UNIT_VLUT,
+
+        [midgard_alu_op_iand]		 = UNITS_ADD, /* XXX: Test case where it's right on smul but not sadd */
+        [midgard_alu_op_ior]		 = UNITS_ADD,
+        [midgard_alu_op_ixor]		 = UNITS_ADD,
+        [midgard_alu_op_inot]		 = UNITS_ALL,
+        [midgard_alu_op_ishl]		 = UNITS_ADD,
+        [midgard_alu_op_iasr]		 = UNITS_ADD,
+        [midgard_alu_op_ilsr]		 = UNITS_ADD,
+        [midgard_alu_op_ilsr]		 = UNITS_ADD,
+
+        [midgard_alu_op_fball_eq]	 = UNITS_ALL,
+        [midgard_alu_op_fbany_neq]	 = UNITS_ALL,
+        [midgard_alu_op_iball_eq]	 = UNITS_ALL,
+        [midgard_alu_op_ibany_neq]	 = UNITS_ALL
+};
--- a/src/gallium/drivers/panfrost/midgard/midgard-parse.h
+++ b/src/gallium/drivers/panfrost/midgard/midgard-parse.h
@ -0,0 +1,70 @@
+/* Author(s):
+ *   Connor Abbott
+ *   Alyssa Rosenzweig
+ *
+ * Copyright (c) 2013 Connor Abbott (connor@abbott.cx)
+ * Copyright (c) 2018 Alyssa Rosenzweig (alyssa@rosenzweig.io)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef __midgard_parse_h__
+#define __midgard_parse_h__
+
+/* Additional metadata for parsing Midgard binaries, not needed for compilation */
+
+static midgard_word_type midgard_word_types[16] = {
+        midgard_word_type_unknown,    /* 0x0 */
+        midgard_word_type_unknown,    /* 0x1 */
+        midgard_word_type_texture,    /* 0x2 */
+        midgard_word_type_texture,    /* 0x3 */
+        midgard_word_type_unknown,    /* 0x4 */
+        midgard_word_type_load_store, /* 0x5 */
+        midgard_word_type_unknown,    /* 0x6 */
+        midgard_word_type_unknown,    /* 0x7 */
+        midgard_word_type_alu,        /* 0x8 */
+        midgard_word_type_alu,        /* 0x9 */
+        midgard_word_type_alu,        /* 0xA */
+        midgard_word_type_alu,        /* 0xB */
+        midgard_word_type_alu,        /* 0xC */
+        midgard_word_type_alu,        /* 0xD */
+        midgard_word_type_alu,        /* 0xE */
+        midgard_word_type_alu,        /* 0xF */
+};
+
+static unsigned midgard_word_size[16] = {
+        0, /* 0x0 */
+        0, /* 0x1 */
+        1, /* 0x2 */
+        1, /* 0x3 */
+        0, /* 0x4 */
+        1, /* 0x5 */
+        0, /* 0x6 */
+        0, /* 0x7 */
+        1, /* 0x8 */
+        2, /* 0x9 */
+        3, /* 0xA */
+        4, /* 0xB */
+        1, /* 0xC */
+        2, /* 0xD */
+        3, /* 0xE */
+        4, /* 0xF */
+};
+
+#endif
--- a/src/gallium/drivers/panfrost/midgard/midgard.h
+++ b/src/gallium/drivers/panfrost/midgard/midgard.h
@ -0,0 +1,473 @@
+/* Author(s):
+ *   Connor Abbott
+ *   Alyssa Rosenzweig
+ *
+ * Copyright (c) 2013 Connor Abbott (connor@abbott.cx)
+ * Copyright (c) 2018 Alyssa Rosenzweig (alyssa@rosenzweig.io)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef __midgard_h__
+#define __midgard_h__
+
+#include <stdint.h>
+#include <stdbool.h>
+
+typedef enum {
+        midgard_word_type_alu,
+        midgard_word_type_load_store,
+        midgard_word_type_texture,
+        midgard_word_type_unknown
+} midgard_word_type;
+
+typedef enum {
+        midgard_alu_vmul,
+        midgard_alu_sadd,
+        midgard_alu_smul,
+        midgard_alu_vadd,
+        midgard_alu_lut
+} midgard_alu;
+
+/*
+ * ALU words
+ */
+
+typedef enum {
+        midgard_alu_op_fadd       = 0x10,
+        midgard_alu_op_fmul       = 0x14,
+        midgard_alu_op_fmin       = 0x28,
+        midgard_alu_op_fmax       = 0x2C,
+        midgard_alu_op_fmov       = 0x30,
+        midgard_alu_op_ffloor     = 0x36,
+        midgard_alu_op_fceil      = 0x37,
+        midgard_alu_op_fdot3      = 0x3C,
+        midgard_alu_op_fdot3r     = 0x3D,
+        midgard_alu_op_fdot4      = 0x3E,
+        midgard_alu_op_freduce    = 0x3F,
+        midgard_alu_op_iadd       = 0x40,
+        midgard_alu_op_ishladd    = 0x41,
+        midgard_alu_op_isub       = 0x46,
+        midgard_alu_op_imul       = 0x58,
+        midgard_alu_op_imin       = 0x60,
+        midgard_alu_op_imax       = 0x62,
+        midgard_alu_op_iasr       = 0x68,
+        midgard_alu_op_ilsr       = 0x69,
+        midgard_alu_op_ishl       = 0x6E,
+        midgard_alu_op_iand       = 0x70,
+        midgard_alu_op_ior        = 0x71,
+        midgard_alu_op_inot       = 0x72,
+        midgard_alu_op_iandnot    = 0x74, /* (a, b) -> a & ~b, used for not/b2f */
+        midgard_alu_op_ixor       = 0x76,
+        midgard_alu_op_imov       = 0x7B,
+        midgard_alu_op_feq        = 0x80,
+        midgard_alu_op_fne        = 0x81,
+        midgard_alu_op_flt        = 0x82,
+        midgard_alu_op_fle        = 0x83,
+        midgard_alu_op_fball_eq   = 0x88,
+        midgard_alu_op_bball_eq   = 0x89,
+        midgard_alu_op_bbany_neq  = 0x90, /* used for bvec4(1) */
+        midgard_alu_op_fbany_neq  = 0x91, /* bvec4(0) also */
+        midgard_alu_op_f2i        = 0x99,
+        midgard_alu_op_f2u8       = 0x9C,
+        midgard_alu_op_f2u        = 0x9D,
+        midgard_alu_op_ieq        = 0xA0,
+        midgard_alu_op_ine        = 0xA1,
+        midgard_alu_op_ilt        = 0xA4,
+        midgard_alu_op_ile        = 0xA5,
+        midgard_alu_op_iball_eq   = 0xA8,
+        midgard_alu_op_ball       = 0xA9,
+        midgard_alu_op_ibany_neq  = 0xB1,
+        midgard_alu_op_i2f        = 0xB8,
+        midgard_alu_op_u2f        = 0xBC,
+        midgard_alu_op_icsel      = 0xC1,
+        midgard_alu_op_fcsel      = 0xC5,
+        midgard_alu_op_fatan_pt2  = 0xE8,
+        midgard_alu_op_frcp       = 0xF0,
+        midgard_alu_op_frsqrt     = 0xF2,
+        midgard_alu_op_fsqrt      = 0xF3,
+        midgard_alu_op_fexp2      = 0xF4,
+        midgard_alu_op_flog2      = 0xF5,
+        midgard_alu_op_fsin       = 0xF6,
+        midgard_alu_op_fcos       = 0xF7,
+        midgard_alu_op_fatan2_pt1 = 0xF9,
+} midgard_alu_op;
+
+typedef enum {
+        midgard_outmod_none = 0,
+        midgard_outmod_pos  = 1,
+        midgard_outmod_int  = 2,
+        midgard_outmod_sat  = 3
+} midgard_outmod;
+
+typedef enum {
+        midgard_reg_mode_quarter = 0,
+        midgard_reg_mode_half = 1,
+        midgard_reg_mode_full = 2,
+        midgard_reg_mode_double = 3 /* TODO: verify */
+} midgard_reg_mode;
+
+typedef enum {
+        midgard_dest_override_lower = 0,
+        midgard_dest_override_upper = 1,
+        midgard_dest_override_none = 2
+} midgard_dest_override;
+
+typedef struct
+__attribute__((__packed__))
+{
+        bool abs         : 1;
+        bool negate      : 1;
+
+        /* replicate lower half if dest = half, or low/high half selection if
+         * dest = full
+         */
+        bool rep_low     : 1;
+        bool rep_high    : 1; /* unused if dest = full */
+        bool half        : 1; /* only matters if dest = full */
+        unsigned swizzle : 8;
+}
+midgard_vector_alu_src;
+
+typedef struct
+__attribute__((__packed__))
+{
+        midgard_alu_op op               :  8;
+        midgard_reg_mode reg_mode   :  2;
+        unsigned src1 : 13;
+        unsigned src2 : 13;
+        midgard_dest_override dest_override : 2;
+        midgard_outmod outmod               : 2;
+        unsigned mask                           : 8;
+}
+midgard_vector_alu;
+
+typedef struct
+__attribute__((__packed__))
+{
+        bool abs           : 1;
+        bool negate        : 1;
+        bool full          : 1; /* 0 = half, 1 = full */
+        unsigned component : 3;
+}
+midgard_scalar_alu_src;
+
+typedef struct
+__attribute__((__packed__))
+{
+        midgard_alu_op op         :  8;
+        unsigned src1             :  6;
+        unsigned src2             : 11;
+        unsigned unknown          :  1;
+        midgard_outmod outmod :  2;
+        bool output_full          :  1;
+        unsigned output_component :  3;
+}
+midgard_scalar_alu;
+
+typedef struct
+__attribute__((__packed__))
+{
+        unsigned src1_reg : 5;
+        unsigned src2_reg : 5;
+        unsigned out_reg  : 5;
+        bool src2_imm     : 1;
+}
+midgard_reg_info;
+
+typedef enum {
+        midgard_jmp_writeout_op_branch_uncond = 1,
+        midgard_jmp_writeout_op_branch_cond = 2,
+        midgard_jmp_writeout_op_discard = 4,
+        midgard_jmp_writeout_op_writeout = 7,
+} midgard_jmp_writeout_op;
+
+typedef enum {
+        midgard_condition_write0 = 0,
+        midgard_condition_false = 1,
+        midgard_condition_true = 2,
+        midgard_condition_always = 3, /* Special for writeout/uncond discard */
+} midgard_condition;
+
+typedef struct
+__attribute__((__packed__))
+{
+        midgard_jmp_writeout_op op : 3; /* == branch_uncond */
+        unsigned dest_tag : 4; /* tag of branch destination */
+        unsigned unknown : 2;
+        int offset : 7;
+}
+midgard_branch_uncond;
+
+typedef struct
+__attribute__((__packed__))
+{
+        midgard_jmp_writeout_op op : 3; /* == branch_cond */
+        unsigned dest_tag : 4; /* tag of branch destination */
+        int offset : 7;
+        midgard_condition cond : 2;
+}
+midgard_branch_cond;
+
+typedef struct
+__attribute__((__packed__))
+{
+        midgard_jmp_writeout_op op : 3; /* == branch_cond */
+        unsigned dest_tag : 4; /* tag of branch destination */
+        unsigned unknown : 2;
+        signed offset : 7;
+        unsigned zero : 16;
+        unsigned cond : 16;
+}
+midgard_branch_extended;
+
+typedef struct
+__attribute__((__packed__))
+{
+        midgard_jmp_writeout_op op : 3; /* == writeout */
+        unsigned unknown : 13;
+}
+midgard_writeout;
+
+/*
+ * Load/store words
+ */
+
+typedef enum {
+        midgard_op_ld_st_noop   = 0x03,
+        midgard_op_load_attr_16 = 0x95,
+        midgard_op_load_attr_32 = 0x94,
+        midgard_op_load_vary_16 = 0x99,
+        midgard_op_load_vary_32 = 0x98,
+        midgard_op_load_color_buffer_16 = 0x9D,
+        midgard_op_load_color_buffer_8 = 0xBA,
+        midgard_op_load_uniform_16 = 0xAC,
+        midgard_op_load_uniform_32 = 0xB0,
+        midgard_op_store_vary_16 = 0xD5,
+        midgard_op_store_vary_32 = 0xD4
+} midgard_load_store_op;
+
+typedef enum {
+        midgard_interp_centroid = 1,
+        midgard_interp_default = 2
+} midgard_interpolation;
+
+typedef struct
+__attribute__((__packed__))
+{
+        unsigned zero1 : 4; /* Always zero */
+
+        /* Varying qualifiers, zero if not a varying */
+        unsigned flat    : 1;
+        unsigned is_varying : 1; /* Always one for varying, but maybe something else? */
+        midgard_interpolation interpolation : 2;
+
+        unsigned zero2 : 2; /* Always zero */
+}
+midgard_varying_parameter;
+
+typedef struct
+__attribute__((__packed__))
+{
+        midgard_load_store_op op : 8;
+        unsigned reg     : 5;
+        unsigned mask    : 4;
+        unsigned swizzle : 8;
+        unsigned unknown : 16;
+
+        unsigned varying_parameters : 10;
+
+        unsigned address : 9;
+}
+midgard_load_store_word;
+
+typedef struct
+__attribute__((__packed__))
+{
+        unsigned type      : 4;
+        unsigned next_type : 4;
+        uint64_t word1     : 60;
+        uint64_t word2     : 60;
+}
+midgard_load_store;
+
+/* Texture pipeline results are in r28-r29 */
+#define REG_TEX_BASE 28
+
+/* Texture opcodes... maybe? */
+#define TEXTURE_OP_NORMAL 0x11
+#define TEXTURE_OP_TEXEL_FETCH 0x14
+
+/* Texture format types, found in format */
+#define TEXTURE_CUBE 0x00
+#define TEXTURE_2D 0x02
+#define TEXTURE_3D 0x03
+
+typedef struct
+__attribute__((__packed__))
+{
+        unsigned type      : 4;
+        unsigned next_type : 4;
+
+        unsigned op  : 6;
+        unsigned shadow    : 1;
+        unsigned unknown3  : 1;
+
+        /* A little obscure, but last is set for the last texture operation in
+         * a shader. cont appears to just be last's opposite (?). Yeah, I know,
+         * kind of funky.. BiOpen thinks it could do with memory hinting, or
+         * tile locking? */
+
+        unsigned cont  : 1;
+        unsigned last  : 1;
+
+        unsigned format    : 5;
+        unsigned has_offset : 1;
+
+        /* Like in Bifrost */
+        unsigned filter  : 1;
+
+        unsigned in_reg_select : 1;
+        unsigned in_reg_upper  : 1;
+
+        unsigned in_reg_swizzle_left : 2;
+        unsigned in_reg_swizzle_right : 2;
+
+        unsigned unknown1 : 2;
+
+        unsigned unknown8  : 4;
+
+        unsigned out_full  : 1;
+
+        /* Always 1 afaict... */
+        unsigned unknown7  : 2;
+
+        unsigned out_reg_select : 1;
+        unsigned out_upper : 1;
+
+        unsigned mask : 4;
+
+        unsigned unknown2  : 2;
+
+        unsigned swizzle  : 8;
+        unsigned unknown4  : 8;
+
+        unsigned unknownA  : 4;
+
+        unsigned offset_unknown1  : 1;
+        unsigned offset_reg_select : 1;
+        unsigned offset_reg_upper : 1;
+        unsigned offset_unknown4  : 1;
+        unsigned offset_unknown5  : 1;
+        unsigned offset_unknown6  : 1;
+        unsigned offset_unknown7  : 1;
+        unsigned offset_unknown8  : 1;
+        unsigned offset_unknown9  : 1;
+
+        unsigned unknownB  : 3;
+
+        /* Texture bias or LOD, depending on whether it is executed in a
+         * fragment/vertex shader respectively. Compute as int(2^8 * biasf).
+         *
+         * For texel fetch, this is the LOD as is. */
+        unsigned bias  : 8;
+
+        unsigned unknown9  : 8;
+
+        unsigned texture_handle : 16;
+        unsigned sampler_handle : 16;
+}
+midgard_texture_word;
+
+/* Opcode name table */
+
+static char *alu_opcode_names[256] = {
+        [midgard_alu_op_fadd]       = "fadd",
+        [midgard_alu_op_fmul]       = "fmul",
+        [midgard_alu_op_fmin]       = "fmin",
+        [midgard_alu_op_fmax]       = "fmax",
+        [midgard_alu_op_fmov]       = "fmov",
+        [midgard_alu_op_ffloor]     = "ffloor",
+        [midgard_alu_op_fceil]      = "fceil",
+        [midgard_alu_op_fdot3]      = "fdot3",
+        [midgard_alu_op_fdot3r]     = "fdot3r",
+        [midgard_alu_op_fdot4]      = "fdot4",
+        [midgard_alu_op_freduce]    = "freduce",
+        [midgard_alu_op_imin]       = "imin",
+        [midgard_alu_op_imax]       = "imax",
+        [midgard_alu_op_ishl]       = "ishl",
+        [midgard_alu_op_iasr]       = "iasr",
+        [midgard_alu_op_ilsr]       = "ilsr",
+        [midgard_alu_op_iadd]       = "iadd",
+        [midgard_alu_op_ishladd]    = "ishladd",
+        [midgard_alu_op_isub]       = "isub",
+        [midgard_alu_op_imul]       = "imul",
+        [midgard_alu_op_imov]       = "imov",
+        [midgard_alu_op_iand]       = "iand",
+        [midgard_alu_op_ior]        = "ior",
+        [midgard_alu_op_inot]       = "inot",
+        [midgard_alu_op_iandnot]    = "iandnot",
+        [midgard_alu_op_ixor]       = "ixor",
+        [midgard_alu_op_feq]        = "feq",
+        [midgard_alu_op_fne]        = "fne",
+        [midgard_alu_op_flt]        = "flt",
+        [midgard_alu_op_fle]        = "fle",
+        [midgard_alu_op_fball_eq]   = "fball_eq",
+        [midgard_alu_op_fbany_neq]  = "fbany_neq",
+        [midgard_alu_op_bball_eq]   = "bball_eq",
+        [midgard_alu_op_bbany_neq]  = "bbany_neq",
+        [midgard_alu_op_f2i]        = "f2i",
+        [midgard_alu_op_f2u]        = "f2u",
+        [midgard_alu_op_f2u8]       = "f2u8",
+        [midgard_alu_op_ieq]        = "ieq",
+        [midgard_alu_op_ine]        = "ine",
+        [midgard_alu_op_ilt]        = "ilt",
+        [midgard_alu_op_ile]        = "ile",
+        [midgard_alu_op_iball_eq]   = "iball_eq",
+        [midgard_alu_op_ball]       = "ball",
+        [midgard_alu_op_ibany_neq]  = "ibany_neq",
+        [midgard_alu_op_i2f]        = "i2f",
+        [midgard_alu_op_u2f]        = "u2f",
+        [midgard_alu_op_icsel]      = "icsel",
+        [midgard_alu_op_fcsel]      = "fcsel",
+        [midgard_alu_op_fatan_pt2]  = "fatan_pt2",
+        [midgard_alu_op_frcp]       = "frcp",
+        [midgard_alu_op_frsqrt]     = "frsqrt",
+        [midgard_alu_op_fsqrt]      = "fsqrt",
+        [midgard_alu_op_fexp2]      = "fexp2",
+        [midgard_alu_op_flog2]      = "flog2",
+        [midgard_alu_op_fsin]       = "fsin",
+        [midgard_alu_op_fcos]       = "fcos",
+        [midgard_alu_op_fatan2_pt1] = "fatan2_pt1"
+};
+
+static char *load_store_opcode_names[256] = {
+        [midgard_op_load_attr_16] = "ld_attr_16",
+        [midgard_op_load_attr_32] = "ld_attr_32",
+        [midgard_op_load_vary_16] = "ld_vary_16",
+        [midgard_op_load_vary_32] = "ld_vary_32",
+        [midgard_op_load_uniform_16] = "ld_uniform_16",
+        [midgard_op_load_uniform_32] = "ld_uniform_32",
+        [midgard_op_load_color_buffer_8] = "ld_color_buffer_8",
+        [midgard_op_load_color_buffer_16] = "ld_color_buffer_16",
+        [midgard_op_store_vary_16] = "st_vary_16",
+        [midgard_op_store_vary_32] = "st_vary_32"
+};
+
+#endif
--- a/src/gallium/drivers/panfrost/midgard/midgard_compile.c
+++ b/src/gallium/drivers/panfrost/midgard/midgard_compile.c
--- a/src/gallium/drivers/panfrost/midgard/midgard_compile.h
+++ b/src/gallium/drivers/panfrost/midgard/midgard_compile.h
@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2018 Alyssa Rosenzweig <alyssa@rosenzweig.io>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+#include "compiler/nir/nir.h"
+#include "util/u_dynarray.h"
+
+/* Define the general compiler entry point */
+
+typedef struct {
+        int work_register_count;
+        int uniform_count;
+        int uniform_cutoff;
+
+        int attribute_count;
+        int varying_count;
+
+        /* Boolean properties of the program */
+        bool can_discard;
+        bool writes_point_size;
+
+        int first_tag;
+
+        struct util_dynarray compiled;
+
+        /* For a blend shader using a constant color -- patch point. If
+         * negative, there's no constant. */
+
+        int blend_patch_offset;
+
+        /* IN: For a fragment shader with a lowered alpha test, the ref value */
+        float alpha_ref;
+} midgard_program;
+
+int
+midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_blend);
+
+/* NIR options are shared between the standalone compiler and the online
+ * compiler. Defining it here is the simplest, though maybe not the Right
+ * solution. */
+
+static const nir_shader_compiler_options midgard_nir_options = {
+        .lower_ffma = true,
+        .lower_sub = true,
+        .lower_fpow = true,
+        .lower_scmp = true,
+        .lower_flrp32 = true,
+        .lower_flrp64 = true,
+        .lower_ffract = true,
+        .lower_fmod32 = true,
+        .lower_fmod64 = true,
+        .lower_fdiv = true,
+        .lower_idiv = true,
+
+        .vertex_id_zero_based = true,
+        .lower_extract_byte = true,
+        .lower_extract_word = true,
+
+        .native_integers = true
+};
--- a/src/gallium/drivers/panfrost/midgard/midgard_nir.h
+++ b/src/gallium/drivers/panfrost/midgard/midgard_nir.h
@ -0,0 +1,5 @@
+#include <stdbool.h>
+#include "nir.h"
+
+bool midgard_nir_lower_algebraic(nir_shader *shader);
+bool midgard_nir_scale_trig(nir_shader *shader);
--- a/src/gallium/drivers/panfrost/midgard/midgard_nir_algebraic.py
+++ b/src/gallium/drivers/panfrost/midgard/midgard_nir_algebraic.py
@ -0,0 +1,71 @@
+#
+# Copyright (C) 2018 Alyssa Rosenzweig
+#
+# Copyright (C) 2016 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+
+import argparse
+import sys
+import math
+
+a = 'a'
+b = 'b'
+
+algebraic = [
+    (('b2i32', a), ('iand@32', "a@32", 1)),
+    (('isign', a), ('imin', ('imax', a, -1), 1)),
+    (('fge', a, b), ('flt', b, a)),
+
+    # XXX: We have hw ops for this, just unknown atm..
+    #(('fsign@32', a), ('i2f32@32', ('isign', ('f2i32@32', ('fmul', a, 0x43800000)))))
+    #(('fsign', a), ('fcsel', ('fge', a, 0), 1.0, ('fcsel', ('flt', a, 0.0), -1.0, 0.0)))
+    (('fsign', a), ('bcsel', ('fge', a, 0), 1.0, -1.0)),
+]
+
+# Midgard scales fsin/fcos arguments by pi.
+# Pass must be run only once, after the main loop
+
+scale_trig = [
+        (('fsin', a), ('fsin', ('fdiv', a, math.pi))),
+        (('fcos', a), ('fcos', ('fdiv', a, math.pi))),
+]
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-p', '--import-path', required=True)
+    args = parser.parse_args()
+    sys.path.insert(0, args.import_path)
+    run()
+
+
+def run():
+    import nir_algebraic  # pylint: disable=import-error
+
+    print('#include "midgard_nir.h"')
+    print(nir_algebraic.AlgebraicPass("midgard_nir_lower_algebraic",
+                                      algebraic).render())
+
+    print(nir_algebraic.AlgebraicPass("midgard_nir_scale_trig",
+                                      scale_trig).render())
+
+
+if __name__ == '__main__':
+    main()