From acba08b58f11fe6b716ea46189ae597150abc16a Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Tue, 21 Sep 2021 16:50:04 +0200 Subject: [PATCH] ir3: Implement and document ldc.k Part-of: --- src/compiler/nir/nir_intrinsics.py | 5 ++ src/freedreno/ir3/disasm-a3xx.c | 1 + src/freedreno/ir3/instr-a3xx.h | 2 + src/freedreno/ir3/ir3.h | 1 + src/freedreno/ir3/ir3_compiler_nir.c | 29 +++++++++++ src/freedreno/ir3/ir3_lexer.l | 1 + src/freedreno/ir3/ir3_parser.y | 8 ++- src/freedreno/ir3/ir3_print.c | 2 + src/freedreno/ir3/ir3_validate.c | 4 ++ src/freedreno/ir3/tests/disasm.c | 5 ++ src/freedreno/isa/ir3-cat6.xml | 78 +++++++++++++++++++--------- 11 files changed, 109 insertions(+), 27 deletions(-) diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 1bb4c178b8b..04192896b09 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -1149,6 +1149,11 @@ barrier("preamble_end_ir3") # IR3-specific intrinsic for stc. Should be used in the shader preamble. store("uniform_ir3", [], indices=[BASE]) +# IR3-specific intrinsic for ldc.k. Copies UBO to constant file. +# base is the const file base in components, range is the amount to copy in +# vec4's. +intrinsic("copy_ubo_to_uniform_ir3", [1, 1], indices=[BASE, RANGE]) + # DXIL specific intrinsics # src[] = { value, mask, index, offset }. intrinsic("store_ssbo_masked_dxil", [1, 1, 1, 1]) diff --git a/src/freedreno/ir3/disasm-a3xx.c b/src/freedreno/ir3/disasm-a3xx.c index 224ef79b400..4f57000c898 100644 --- a/src/freedreno/ir3/disasm-a3xx.c +++ b/src/freedreno/ir3/disasm-a3xx.c @@ -395,6 +395,7 @@ static const struct opc_info { OPC(6, OPC_GETWID, getwid), OPC(6, OPC_GETFIBERID, getfiberid), OPC(6, OPC_STC, stc), + OPC(6, OPC_LDC_K, ldc.k), OPC(6, OPC_SPILL_MACRO, spill.macro), OPC(6, OPC_RELOAD_MACRO, reload.macro), diff --git a/src/freedreno/ir3/instr-a3xx.h b/src/freedreno/ir3/instr-a3xx.h index aee2af83740..78a65e37c4e 100644 --- a/src/freedreno/ir3/instr-a3xx.h +++ b/src/freedreno/ir3/instr-a3xx.h @@ -355,6 +355,8 @@ typedef enum { OPC_SPILL_MACRO = _OPC(6, 79), OPC_RELOAD_MACRO = _OPC(6, 80), + OPC_LDC_K = _OPC(6, 81), + /* category 7: */ OPC_BAR = _OPC(7, 0), OPC_FENCE = _OPC(7, 1), diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index ca46d3bd4ca..2d94c3a7d2b 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -2389,6 +2389,7 @@ INSTR2(QUAD_SHUFFLE_BRCST) INSTR1(QUAD_SHUFFLE_HORIZ) INSTR1(QUAD_SHUFFLE_VERT) INSTR1(QUAD_SHUFFLE_DIAG) +INSTR2NODST(LDC_K) INSTR2NODST(STC) #if GPU >= 600 INSTR3NODST(STIB); diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index ecaf8e77fb3..a3d09fdfe52 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -899,6 +899,32 @@ emit_intrinsic_load_ubo_ldc(struct ir3_context *ctx, nir_intrinsic_instr *intr, ir3_split_dest(b, dst, ldc, 0, ncomp); } +static void +emit_intrinsic_copy_ubo_to_uniform(struct ir3_context *ctx, + nir_intrinsic_instr *intr) +{ + struct ir3_block *b = ctx->block; + + unsigned base = nir_intrinsic_base(intr); + unsigned size = nir_intrinsic_range(intr); + + struct ir3_instruction *addr1 = ir3_get_addr1(ctx, base); + + struct ir3_instruction *offset = ir3_get_src(ctx, &intr->src[1])[0]; + struct ir3_instruction *idx = ir3_get_src(ctx, &intr->src[0])[0]; + struct ir3_instruction *ldc = ir3_LDC_K(b, idx, 0, offset, 0); + ldc->cat6.iim_val = size; + ldc->barrier_class = ldc->barrier_conflict = IR3_BARRIER_CONST_W; + + ir3_handle_bindless_cat6(ldc, intr->src[0]); + if (ldc->flags & IR3_INSTR_B) + ctx->so->bindless_ubo = true; + + ir3_instr_set_address(ldc, addr1); + + array_insert(b, b->keeps, ldc); +} + /* handles direct/indirect UBO reads: */ static void emit_intrinsic_load_ubo(struct ir3_context *ctx, nir_intrinsic_instr *intr, @@ -2128,6 +2154,9 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) case nir_intrinsic_load_ubo_vec4: emit_intrinsic_load_ubo_ldc(ctx, intr, dst); break; + case nir_intrinsic_copy_ubo_to_uniform_ir3: + emit_intrinsic_copy_ubo_to_uniform(ctx, intr); + break; case nir_intrinsic_load_frag_coord: ir3_split_dest(b, dst, get_frag_coord(ctx, intr), 0, 4); break; diff --git a/src/freedreno/ir3/ir3_lexer.l b/src/freedreno/ir3/ir3_lexer.l index 181a17a90e6..52b97789645 100644 --- a/src/freedreno/ir3/ir3_lexer.l +++ b/src/freedreno/ir3/ir3_lexer.l @@ -414,6 +414,7 @@ static int parse_reg(const char *str) "p" return 'p'; "s2en" return TOKEN(T_S2EN); "s" return 's'; +"k" return 'k'; "base"[0-9]+ ir3_yylval.num = strtol(yytext+4, NULL, 10); return T_BASE; "offset"[0-9]+ ir3_yylval.num = strtol(yytext+6, NULL, 10); return T_OFFSET; "uniform" return T_UNIFORM; diff --git a/src/freedreno/ir3/ir3_parser.y b/src/freedreno/ir3/ir3_parser.y index 2aaebd91221..fd29c639da0 100644 --- a/src/freedreno/ir3/ir3_parser.y +++ b/src/freedreno/ir3/ir3_parser.y @@ -1229,8 +1229,12 @@ cat6_bindless_ibo: cat6_bindless_ibo_opc_1src cat6_typed cat6_dim cat6_type '.' cat6_bindless_ldc_opc: T_OP_LDC { new_instr(OPC_LDC); } -cat6_bindless_ldc: cat6_bindless_ldc_opc '.' T_OFFSET '.' cat6_immed '.' cat6_bindless_mode dst_reg ',' cat6_reg_or_immed ',' cat6_reg_or_immed { - instr->cat6.d = $3; +/* This is separated from the opcode to avoid lookahead/shift-reduce conflicts */ +cat6_bindless_ldc_middle: + T_OFFSET '.' cat6_immed '.' cat6_bindless_mode dst_reg { instr->cat6.d = $1; } +| cat6_immed '.' 'k' '.' cat6_bindless_mode 'c' '[' T_A1 ']' { instr->opc = OPC_LDC_K; } + +cat6_bindless_ldc: cat6_bindless_ldc_opc '.' cat6_bindless_ldc_middle ',' cat6_reg_or_immed ',' cat6_reg_or_immed { instr->cat6.type = TYPE_U32; /* TODO cleanup ir3 src order: */ swap(instr->srcs[0], instr->srcs[1]); diff --git a/src/freedreno/ir3/ir3_print.c b/src/freedreno/ir3/ir3_print.c index ba6405382e3..e85513d3ea9 100644 --- a/src/freedreno/ir3/ir3_print.c +++ b/src/freedreno/ir3/ir3_print.c @@ -215,6 +215,8 @@ print_instr_name(struct log_stream *stream, struct ir3_instruction *instr, mesa_log_stream_printf(stream, ".a1en"); if (instr->opc == OPC_LDC) mesa_log_stream_printf(stream, ".offset%d", instr->cat6.d); + if (instr->opc == OPC_LDC_K) + mesa_log_stream_printf(stream, ".%d", instr->cat6.iim_val); if (instr->flags & IR3_INSTR_B) { mesa_log_stream_printf( stream, ".base%d", diff --git a/src/freedreno/ir3/ir3_validate.c b/src/freedreno/ir3/ir3_validate.c index 84be40ca795..9c6d5ddcd90 100644 --- a/src/freedreno/ir3/ir3_validate.c +++ b/src/freedreno/ir3/ir3_validate.c @@ -350,6 +350,10 @@ validate_instr(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr) validate_reg_size(ctx, instr->srcs[0], instr->cat6.type); validate_assert(ctx, !(instr->srcs[1]->flags & IR3_REG_HALF)); break; + case OPC_LDC_K: + validate_assert(ctx, !(instr->srcs[0]->flags & IR3_REG_HALF)); + validate_assert(ctx, !(instr->srcs[1]->flags & IR3_REG_HALF)); + break; default: validate_reg_size(ctx, instr->dsts[0], instr->cat6.type); validate_assert(ctx, !(instr->srcs[0]->flags & IR3_REG_HALF)); diff --git a/src/freedreno/ir3/tests/disasm.c b/src/freedreno/ir3/tests/disasm.c index ae68ffc621c..f06e77c1929 100644 --- a/src/freedreno/ir3/tests/disasm.c +++ b/src/freedreno/ir3/tests/disasm.c @@ -348,6 +348,11 @@ static const struct test { INSTR_6XX(c0260000_00478400, "ldc.offset2.1.imm r0.x, r0.x, 0"), /* ldc.1.mode0.base0 r0.x, r0.x, 0 */ INSTR_6XX(c0260000_00478600, "ldc.offset3.1.imm r0.x, r0.x, 0"), /* ldc.1.mode0.base0 r0.x, r0.x, 0 */ + /* dEQP-VK.glsl.conditionals.if.if_else_vertex */ + INSTR_6XX(c0360000_00c78100, "ldc.1.k.imm.base0 c[a1.x], 0, 0"), /* ldc.1.k.mode4.base0 c[a1.x], 0, 0 */ + /* custom */ + INSTR_6XX(c0360003_00c78100, "ldc.4.k.imm.base0 c[a1.x], 0, 0"), /* ldc.4.k.mode4.base0 c[a1.x], 0, 0 */ + /* dEQP-VK.glsl.struct.local.nested_struct_array_dynamic_index_fragment */ INSTR_6XX(c1425b50_01803e02, "stp.f32 p[r11.y-176], r0.y, 1"), INSTR_6XX(c1425b98_02803e14, "stp.f32 p[r11.y-104], r2.z, 2"), diff --git a/src/freedreno/isa/ir3-cat6.xml b/src/freedreno/isa/ir3-cat6.xml index 4227d558dcf..9d6048b1040 100644 --- a/src/freedreno/isa/ir3-cat6.xml +++ b/src/freedreno/isa/ir3-cat6.xml @@ -878,27 +878,8 @@ SOFTWARE. - - - LoaD Constant - UBO load - - - - {K} - - {SY}{JP}{NAME}.{TYPE_SIZE}.k.{MODE}{BASE} c[a1.x], {SRC1}, {SRC2} - - - - - - {SY}{JP}{NAME}.offset{OFFSET}.{TYPE_SIZE}.{MODE}{BASE} {DST}, {SRC1}, {SRC2} - + x - x 011110 1xx @@ -910,19 +891,62 @@ SOFTWARE. - x11 - - 1 - 0 !!(src->srcs[1]->flags & IR3_REG_IMMED) - src->cat6.d src->srcs[1] src->srcs[0] + + + ldc.k copies a series of UBO values to constants. In other + words, it acts the same as a series of ldc followed by stc. It's + also similar to a CP_LOAD_STATE with a UBO source but executed + in the shader. + + Like CP_LOAD_STATE, the UBO offset and const file offset must be + a multiple of 4 vec4's but it can load any number of vec4's. The + UBO descriptor and offset are the same as a normal ldc. The + const file offset is specified in a1.x and is in units of + components, and the number of vec4's to copy is specified in + LOAD_SIZE. + + + {SY}{JP}ldc.{LOAD_SIZE}.k.{MODE}{BASE} c[a1.x], {SRC1}, {SRC2} + + + + + + xx + 11 + + + src->cat6.iim_val - 1 + + + + + + LoaD Constant - UBO load + + + + {SY}{JP}{NAME}.offset{OFFSET}.{TYPE_SIZE}.{MODE}{BASE} {DST}, {SRC1}, {SRC2} + + + + 10 + + src->cat6.d + + + GET Shader Processor ID? @@ -1135,6 +1159,10 @@ SOFTWARE. {TYPE_SIZE_MINUS_ONE} + 1 + + {LOAD_SIZE_MINUS_ONE} + 1 + + {TYPED}