panfrost/midgard: Add load/store opcodes

This commit adds a bunch of new load/store opcodes, largely related to OpenCL, as well as adjusting the name of existing opcodes to be more uniform. The immediate effect is compute shaders are substantially easier to interpret now. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
2019-05-14 04:11:36 +00:00 · 2019-05-14 04:11:36 +00:00 · 74ab80b92d
parent f73c0b73ec
commit 74ab80b92d
4 changed files with 129 additions and 50 deletions
--- a/src/gallium/drivers/panfrost/midgard/disassemble.c
+++ b/src/gallium/drivers/panfrost/midgard/disassemble.c
@ -855,10 +855,10 @@ static bool
 is_op_varying(unsigned op)
 {
        switch (op) {
-        case midgard_op_store_vary_16:
-        case midgard_op_store_vary_32:
-        case midgard_op_load_vary_16:
-        case midgard_op_load_vary_32:
+        case midgard_op_st_vary_16:
+        case midgard_op_st_vary_32:
+        case midgard_op_ld_vary_16:
+        case midgard_op_ld_vary_32:
                return true;
        }

@ -881,7 +881,7 @@ print_load_store_instr(uint64_t data,

        int address = word->address;

-        if (word->op == midgard_op_load_uniform_32) {
+        if (word->op == midgard_op_ld_uniform_32) {
                /* Uniforms use their own addressing scheme */

                int lo = word->varying_parameters >> 7;
--- a/src/gallium/drivers/panfrost/midgard/helpers.h
+++ b/src/gallium/drivers/panfrost/midgard/helpers.h
@ -23,13 +23,13 @@
 */

 #define OP_IS_STORE_VARY(op) (\
-		op == midgard_op_store_vary_16 || \
-		op == midgard_op_store_vary_32 \
+		op == midgard_op_st_vary_16 || \
+		op == midgard_op_st_vary_32 \
 	)

 #define OP_IS_STORE(op) (\
                OP_IS_STORE_VARY(op) || \
-                op == midgard_op_store_cubemap_coords \
+                op == midgard_op_st_cubemap_coords \
 	)

 #define OP_IS_MOVE(op) ( \
--- a/src/gallium/drivers/panfrost/midgard/midgard.h
+++ b/src/gallium/drivers/panfrost/midgard/midgard.h
@ -345,20 +345,63 @@ typedef enum {
        /* Unclear why this is on the L/S unit, but (with an address of 0,
         * appropriate swizzle, magic constant 0x24, and xy mask?) moves fp32 cube
         * map coordinates in r27 to its cube map texture coordinate
-         * destination (e.g r29). 0x4 magic for loading from fp16 instead */
+         * destination (e.g r29). 0x4 magic for lding from fp16 instead */

-        midgard_op_store_cubemap_coords = 0x0E,
+        midgard_op_st_cubemap_coords = 0x0E,

-        midgard_op_load_attr_16 = 0x95,
-        midgard_op_load_attr_32 = 0x94,
-        midgard_op_load_vary_16 = 0x99,
-        midgard_op_load_vary_32 = 0x98,
-        midgard_op_load_color_buffer_16 = 0x9D,
-        midgard_op_load_color_buffer_8 = 0xBA,
-        midgard_op_load_uniform_16 = 0xAC,
-        midgard_op_load_uniform_32 = 0xB0,
-        midgard_op_store_vary_16 = 0xD5,
-        midgard_op_store_vary_32 = 0xD4
+        /* Used in OpenCL. Probably can ld other things as well */
+        midgard_op_ld_global_id = 0x10,
+
+        /* val in r27.y, address embedded, outputs result to argument. Invert val for sub. Let val = +-1 for inc/dec. */
+        midgard_op_atomic_add = 0x40,
+        midgard_op_atomic_and = 0x44,
+        midgard_op_atomic_or = 0x48,
+        midgard_op_atomic_xor = 0x4C,
+
+        midgard_op_atomic_imin = 0x50,
+        midgard_op_atomic_umin = 0x54,
+        midgard_op_atomic_imax = 0x58,
+        midgard_op_atomic_umax = 0x5C,
+
+        midgard_op_atomic_xchg = 0x60,
+
+        /* Used for compute shader's __global arguments, __local variables (or
+         * for register spilling) */
+
+        midgard_op_ld_char = 0x81,
+        midgard_op_ld_char2 = 0x84,
+        midgard_op_ld_short = 0x85,
+        midgard_op_ld_char4 = 0x88, /* short2, int, float */
+        midgard_op_ld_short4 = 0x8C, /* int2, float2, long */
+        midgard_op_ld_int4 = 0x90, /* float4, long2 */
+
+        midgard_op_ld_attr_32 = 0x94,
+        midgard_op_ld_attr_16 = 0x95,
+        midgard_op_ld_attr_32i = 0x97,
+        midgard_op_ld_vary_32 = 0x98,
+        midgard_op_ld_vary_16 = 0x99,
+        midgard_op_ld_vary_32i = 0x9B,
+        midgard_op_ld_color_buffer_16 = 0x9D,
+
+        midgard_op_ld_uniform_16 = 0xAC,
+
+        midgard_op_ld_uniform_32 = 0xB0,
+        midgard_op_ld_color_buffer_8 = 0xBA,
+
+        midgard_op_st_char = 0xC0,
+        midgard_op_st_char2 = 0xC4, /* short */
+        midgard_op_st_char4 = 0xC8, /* short2, int, float */
+        midgard_op_st_short4 = 0xCC, /* int2, float2, long */
+        midgard_op_st_int4 = 0xD0, /* float4, long2 */
+
+        midgard_op_st_vary_32 = 0xD4,
+        midgard_op_st_vary_16 = 0xD5,
+        midgard_op_st_vary_32i = 0xD7,
+
+        /* Value to st in r27, location r26.w as short2 */
+        midgard_op_st_image_f = 0xD8,
+        midgard_op_st_image_ui = 0xDA,
+        midgard_op_st_image_i = 0xDB,
 } midgard_load_store_op;

 typedef enum {
@ -494,17 +537,53 @@ __attribute__((__packed__))
 midgard_texture_word;

 static char *load_store_opcode_names[256] = {
-        [midgard_op_store_cubemap_coords] = "st_cubemap_coords",
-        [midgard_op_load_attr_16] = "ld_attr_16",
-        [midgard_op_load_attr_32] = "ld_attr_32",
-        [midgard_op_load_vary_16] = "ld_vary_16",
-        [midgard_op_load_vary_32] = "ld_vary_32",
-        [midgard_op_load_uniform_16] = "ld_uniform_16",
-        [midgard_op_load_uniform_32] = "ld_uniform_32",
-        [midgard_op_load_color_buffer_8] = "ld_color_buffer_8",
-        [midgard_op_load_color_buffer_16] = "ld_color_buffer_16",
-        [midgard_op_store_vary_16] = "st_vary_16",
-        [midgard_op_store_vary_32] = "st_vary_32"
+        [midgard_op_st_cubemap_coords] = "st_cubemap_coords",
+        [midgard_op_ld_global_id] = "ld_global_id",
+
+        [midgard_op_atomic_add] = "atomic_add",
+        [midgard_op_atomic_and] = "atomic_and",
+        [midgard_op_atomic_or] = "atomic_or",
+        [midgard_op_atomic_xor] = "atomic_xor",
+        [midgard_op_atomic_imin] = "atomic_imin",
+        [midgard_op_atomic_umin] = "atomic_umin",
+        [midgard_op_atomic_imax] = "atomic_imax",
+        [midgard_op_atomic_umax] = "atomic_umax",
+        [midgard_op_atomic_umax] = "atomic_xchg",
+
+        [midgard_op_ld_char] = "ld_char",
+        [midgard_op_ld_char2] = "ld_char2",
+        [midgard_op_ld_short] = "ld_short",
+        [midgard_op_ld_char4] = "ld_char4",
+        [midgard_op_ld_short4] = "ld_short4",
+        [midgard_op_ld_int4] = "ld_int4",
+
+        [midgard_op_ld_attr_32] = "ld_attr_32",
+        [midgard_op_ld_attr_16] = "ld_attr_16",
+        [midgard_op_ld_attr_32i] = "ld_attr_32i",
+
+        [midgard_op_ld_vary_32] = "ld_vary_32",
+        [midgard_op_ld_vary_16] = "ld_vary_16",
+        [midgard_op_ld_vary_32i] = "ld_vary_32i",
+
+        [midgard_op_ld_color_buffer_16] = "ld_color_buffer_16",
+
+        [midgard_op_ld_uniform_16] = "ld_uniform_16",
+        [midgard_op_ld_uniform_32] = "ld_uniform_32",
+        [midgard_op_ld_color_buffer_8] = "ld_color_buffer_8",
+
+        [midgard_op_st_char] = "st_char",
+        [midgard_op_st_char2] = "st_char2",
+        [midgard_op_st_char4] = "st_char4",
+        [midgard_op_st_short4] = "st_short4",
+        [midgard_op_st_int4] = "st_int4",
+
+        [midgard_op_st_vary_32] = "st_vary_32",
+        [midgard_op_st_vary_16] = "st_vary_16",
+        [midgard_op_st_vary_32i] = "st_vary_32i",
+
+        [midgard_op_st_image_f] = "st_image_f",
+        [midgard_op_st_image_ui] = "st_image_ui",
+        [midgard_op_st_image_i] = "st_image_i",
 };

 #endif
--- a/src/gallium/drivers/panfrost/midgard/midgard_compile.c
+++ b/src/gallium/drivers/panfrost/midgard/midgard_compile.c
@ -323,16 +323,16 @@ v_fmov(unsigned src, midgard_vector_alu_src mod, unsigned dest)
 * don't support half-floats -- this requires changes in other parts of the
 * compiler -- therefore the 16-bit versions are commented out. */

-//M_LOAD(load_attr_16);
-M_LOAD(load_attr_32);
-//M_LOAD(load_vary_16);
-M_LOAD(load_vary_32);
-//M_LOAD(load_uniform_16);
-M_LOAD(load_uniform_32);
-M_LOAD(load_color_buffer_8);
-//M_STORE(store_vary_16);
-M_STORE(store_vary_32);
-M_STORE(store_cubemap_coords);
+//M_LOAD(ld_attr_16);
+M_LOAD(ld_attr_32);
+//M_LOAD(ld_vary_16);
+M_LOAD(ld_vary_32);
+//M_LOAD(ld_uniform_16);
+M_LOAD(ld_uniform_32);
+M_LOAD(ld_color_buffer_8);
+//M_STORE(st_vary_16);
+M_STORE(st_vary_32);
+M_STORE(st_cubemap_coords);

 static midgard_instruction
 v_alu_br_compact_cond(midgard_jmp_writeout_op op, unsigned tag, signed offset, unsigned cond)
@ -1487,7 +1487,7 @@ emit_uniform_read(compiler_context *ctx, unsigned dest, unsigned offset, nir_src
                 * higher-indexed uniforms, at a performance cost. More
                 * generally, we're emitting a UBO read instruction. */

-                midgard_instruction ins = m_load_uniform_32(dest, offset);
+                midgard_instruction ins = m_ld_uniform_32(dest, offset);

                /* TODO: Don't split */
                ins.load_store.varying_parameters = (offset & 7) << 7;
@ -1560,7 +1560,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
                        /* XXX: Half-floats? */
                        /* TODO: swizzle, mask */

-                        midgard_instruction ins = m_load_vary_32(reg, offset);
+                        midgard_instruction ins = m_ld_vary_32(reg, offset);

                        midgard_varying_parameter p = {
                                .is_varying = 1,
@ -1615,7 +1615,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
                        } else if (out->data.location == VARYING_SLOT_COL1) {
                                /* Destination color must be read from framebuffer */

-                                midgard_instruction ins = m_load_color_buffer_8(reg, 0);
+                                midgard_instruction ins = m_ld_color_buffer_8(reg, 0);
                                ins.load_store.swizzle = 0; /* xxxx */

                                /* Read each component sequentially */
@ -1682,7 +1682,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
                                assert(0);
                        }
                } else if (ctx->stage == MESA_SHADER_VERTEX) {
-                        midgard_instruction ins = m_load_attr_32(reg, offset);
+                        midgard_instruction ins = m_ld_attr_32(reg, offset);
                        ins.load_store.unknown = 0x1E1E; /* XXX: What is this? */
                        ins.load_store.mask = (1 << instr->num_components) - 1;
                        emit_mir_instruction(ctx, ins);
@ -1745,7 +1745,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
                                attach_constants(ctx, &ins, constant_value, reg + 1);
                                emit_mir_instruction(ctx, ins);

-                                midgard_instruction st = m_store_vary_32(SSA_FIXED_REGISTER(0), offset);
+                                midgard_instruction st = m_st_vary_32(SSA_FIXED_REGISTER(0), offset);
                                st.load_store.unknown = 0x1E9E; /* XXX: What is this? */
                                emit_mir_instruction(ctx, st);
                        } else {
@ -1842,7 +1842,7 @@ emit_tex(compiler_context *ctx, nir_tex_instr *instr)
                                midgard_instruction move = v_fmov(index, alu_src, SSA_FIXED_REGISTER(27));
                                emit_mir_instruction(ctx, move);

-                                midgard_instruction st = m_store_cubemap_coords(reg, 0);
+                                midgard_instruction st = m_st_cubemap_coords(reg, 0);
                                st.load_store.unknown = 0x24; /* XXX: What is this? */
                                st.load_store.mask = 0x3; /* xy? */
                                st.load_store.swizzle = alu_src.swizzle;
@ -2126,7 +2126,7 @@ install_registers(compiler_context *ctx, struct ra_graph *g)

                        case TAG_LOAD_STORE_4: {
                                if (OP_IS_STORE_VARY(ins->load_store.op)) {
-                                        /* TODO: use ssa_args for store_vary */
+                                        /* TODO: use ssa_args for st_vary */
                                        ins->load_store.reg = 0;
                                } else {
                                        bool has_dest = args.dest >= 0;
@ -2239,7 +2239,7 @@ allocate_registers(compiler_context *ctx)
                mir_foreach_instr_in_block(block, ins) {
                        if (ins->compact_branch) continue;

-                        /* Dest is < 0 for store_vary instructions, which break
+                        /* Dest is < 0 for st_vary instructions, which break
                         * the usual SSA conventions. Liveness analysis doesn't
                         * make sense on these instructions, so skip them to
                         * avoid memory corruption */
@ -3432,7 +3432,7 @@ midgard_emit_store(compiler_context *ctx, midgard_block *block) {

                midgard_instruction mov = v_fmov(idx, blank_alu_src, SSA_FIXED_REGISTER(REGISTER_VARYING_BASE + high_varying_register));

-                midgard_instruction st = m_store_vary_32(SSA_FIXED_REGISTER(high_varying_register), varying);
+                midgard_instruction st = m_st_vary_32(SSA_FIXED_REGISTER(high_varying_register), varying);
                st.load_store.unknown = 0x1E9E; /* XXX: What is this? */

                mir_insert_instruction_before(mir_next_op(ins), st);