pan/midgard: Report byte masks for read components

Read component masks don't have a particular type associated, since the type of the ALU operation may not match the type of the operands in question. So let's generate byte masks instead, and update the rest of the compiler to use byte masks when analyzing reads. Preparation for mixed types. Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
2019-10-16 12:25:32 -04:00 · 2019-10-16 12:25:32 -04:00 · e9202ff3cb
parent d079631248
commit e9202ff3cb
6 changed files with 31 additions and 31 deletions
--- a/src/panfrost/midgard/compiler.h
+++ b/src/panfrost/midgard/compiler.h
@ -512,7 +512,7 @@ bool mir_single_use(compiler_context *ctx, unsigned value);
 bool mir_special_index(compiler_context *ctx, unsigned idx);
 unsigned mir_use_count(compiler_context *ctx, unsigned value);
 bool mir_is_written_before(compiler_context *ctx, midgard_instruction *ins, unsigned node);
-unsigned mir_mask_of_read_components(midgard_instruction *ins, unsigned node);
+uint16_t mir_bytemask_of_read_components(midgard_instruction *ins, unsigned node);
 unsigned mir_ubo_shift(midgard_load_store_op op);
 midgard_reg_mode mir_typesize(midgard_instruction *ins);
 uint16_t mir_from_bytemask(uint16_t bytemask, midgard_reg_mode mode);
--- a/src/panfrost/midgard/midgard_liveness.c
+++ b/src/panfrost/midgard/midgard_liveness.c
@ -64,7 +64,8 @@ mir_liveness_ins_update(uint16_t *live, midgard_instruction *ins, unsigned max)

        mir_foreach_src(ins, src) {
                unsigned node = ins->src[src];
-                unsigned mask = mir_mask_of_read_components(ins, node);
+                unsigned bytemask = mir_bytemask_of_read_components(ins, node);
+                unsigned mask = mir_from_bytemask(bytemask, midgard_reg_mode_32);

                liveness_gen(live, node, max, mask);
        }
--- a/src/panfrost/midgard/midgard_ra.c
+++ b/src/panfrost/midgard/midgard_ra.c
@ -525,7 +525,7 @@ mir_lower_special_reads(compiler_context *ctx)
                                } else {
                                        idx = spill_idx++;
                                        m = v_mov(i, blank_alu_src, idx);
-                                        m.mask = mir_mask_of_read_components(pre_use, i);
+                                        m.mask = mir_from_bytemask(mir_bytemask_of_read_components(pre_use, i), midgard_reg_mode_32);
                                        mir_insert_instruction_before(ctx, pre_use, m);
                                        mir_rewrite_index_src_single(pre_use, i, idx);
                                }
--- a/src/panfrost/midgard/midgard_ra_pipeline.c
+++ b/src/panfrost/midgard/midgard_ra_pipeline.c
@ -54,11 +54,11 @@ mir_pipeline_ins(
        unsigned node = ins->dest;
        unsigned read_mask = 0;

-        /* Analyze the bundle for a read mask */
+        /* Analyze the bundle for a per-byte read mask */

        for (unsigned i = 0; i < bundle->instruction_count; ++i) {
                midgard_instruction *q = bundle->instructions[i];
-                read_mask |= mir_mask_of_read_components(q, node);
+                read_mask |= mir_bytemask_of_read_components(q, node);

                /* The fragment colour can't be pipelined (well, it is
                 * pipelined in r0, but this is a delicate dance with
@ -74,7 +74,7 @@ mir_pipeline_ins(
                if (q->dest != node) continue;

                /* Remove the written mask from the read requirements */
-                read_mask &= ~q->mask;
+                read_mask &= ~mir_bytemask(q);
        }

        /* Check for leftovers */
--- a/src/panfrost/midgard/midgard_schedule.c
+++ b/src/panfrost/midgard/midgard_schedule.c
@ -125,7 +125,7 @@ mir_create_dependency_graph(midgard_instruction **instructions, unsigned count,
                        unsigned src = instructions[i]->src[s];

                        if (src < node_count) {
-                                unsigned readmask = mir_mask_of_read_components(instructions[i], src);
+                                unsigned readmask = mir_from_bytemask(mir_bytemask_of_read_components(instructions[i], src), midgard_reg_mode_32);
                                add_dependency(last_write, src, readmask, instructions, i);
                        }
                }
@ -140,7 +140,7 @@ mir_create_dependency_graph(midgard_instruction **instructions, unsigned count,
                        unsigned src = instructions[i]->src[s];

                        if (src < node_count) {
-                                unsigned readmask = mir_mask_of_read_components(instructions[i], src);
+                                unsigned readmask = mir_from_bytemask(mir_bytemask_of_read_components(instructions[i], src), midgard_reg_mode_32);
                                mark_access(last_read, src, readmask, i);
                        }
                }
@ -388,7 +388,7 @@ mir_adjust_constants(midgard_instruction *ins,
                uint32_t *bundles = (uint32_t *) pred->constants;
                uint32_t *constants = (uint32_t *) ins->constants;
                unsigned r_constant = SSA_FIXED_REGISTER(REGISTER_CONSTANT);
-                unsigned mask = mir_mask_of_read_components(ins, r_constant);
+                unsigned mask = mir_from_bytemask(mir_bytemask_of_read_components(ins, r_constant), midgard_reg_mode_32);

                /* First, check if it fits */
                unsigned count = DIV_ROUND_UP(pred->constant_count, sizeof(uint32_t));
@ -1290,11 +1290,11 @@ static void mir_spill_register(
                }
        }

-        /* For special reads, figure out how many components we need */
-        unsigned read_mask = 0;
+        /* For special reads, figure out how many bytes we need */
+        unsigned read_bytemask = 0;

        mir_foreach_instr_global_safe(ctx, ins) {
-                read_mask |= mir_mask_of_read_components(ins, spill_node);
+                read_bytemask |= mir_bytemask_of_read_components(ins, spill_node);
        }

        /* Insert a load from TLS before the first consecutive
@ -1349,7 +1349,7 @@ static void mir_spill_register(
                                /* Mask the load based on the component count
                                 * actually needed to prvent RA loops */

-                                st.mask = read_mask;
+                                st.mask = mir_from_bytemask(read_bytemask, midgard_reg_mode_32);

                                mir_insert_instruction_before_scheduled(ctx, block, before, st);
                               // consecutive_skip = true;
--- a/src/panfrost/midgard/mir.c
+++ b/src/panfrost/midgard/mir.c
@ -533,19 +533,19 @@ mir_bytemask(midgard_instruction *ins)
 * will return a mask of Z/Y for r2
 */

-static unsigned
-mir_mask_of_read_components_single(unsigned swizzle, unsigned outmask)
+static uint16_t
+mir_bytemask_of_read_components_single(unsigned swizzle, unsigned inmask, midgard_reg_mode mode)
 {
-        unsigned mask = 0;
+        unsigned cmask = 0;

        for (unsigned c = 0; c < 4; ++c) {
-                if (!(outmask & (1 << c))) continue;
+                if (!(inmask & (1 << c))) continue;

                unsigned comp = (swizzle >> (2*c)) & 3;
-                mask |= (1 << comp);
+                cmask |= (1 << comp);
        }

-        return mask;
+        return mir_to_bytemask(mode, cmask);
 }

 static unsigned
@ -565,40 +565,39 @@ mir_source_count(midgard_instruction *ins)
        }
 }

-unsigned
-mir_mask_of_read_components(midgard_instruction *ins, unsigned node)
+uint16_t
+mir_bytemask_of_read_components(midgard_instruction *ins, unsigned node)
 {
-        unsigned mask = 0;
+        uint16_t mask = 0;

        for (unsigned i = 0; i < mir_source_count(ins); ++i) {
                if (ins->src[i] != node) continue;

                /* Branch writeout uses all components */
                if (ins->compact_branch && ins->writeout && (i == 0))
-                        return 0xF;
+                        return 0xFFFF;

-                /* Conditional branches read one component (TODO: multi branch??) */
+                /* Conditional branches read one 32-bit component = 4 bytes (TODO: multi branch??) */
                if (ins->compact_branch && !ins->prepacked_branch && ins->branch.conditional && (i == 0))
-                        return 0x1;
+                        return 0xF;

                /* ALU ops act componentwise so we need to pay attention to
                 * their mask. Texture/ldst does not so we don't clamp source
                 * readmasks based on the writemask */
-                unsigned qmask = (ins->type == TAG_ALU_4) ? ins->mask : 0xF;
+                unsigned qmask = (ins->type == TAG_ALU_4) ? ins->mask : ~0;

                /* Handle dot products and things */
                if (ins->type == TAG_ALU_4 && !ins->compact_branch) {
-                        unsigned channel_override =
-                                GET_CHANNEL_COUNT(alu_opcode_props[ins->alu.op].props);
+                        unsigned props = alu_opcode_props[ins->alu.op].props;
+
+                        unsigned channel_override = GET_CHANNEL_COUNT(props);

                        if (channel_override)
                                qmask = mask_of(channel_override);
                }

                unsigned swizzle = mir_get_swizzle(ins, i);
-                unsigned m = mir_mask_of_read_components_single(swizzle, qmask);
-
-               mask |= m;
+                mask |= mir_bytemask_of_read_components_single(swizzle, qmask, mir_srcsize(ins, i));
        }

        return mask;