ir3: Implement and document ldc.k

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13148>
This commit is contained in:
Connor Abbott 2021-09-21 16:50:04 +02:00 committed by Marge Bot
parent fccc35c2de
commit acba08b58f
11 changed files with 109 additions and 27 deletions

View File

@ -1149,6 +1149,11 @@ barrier("preamble_end_ir3")
# IR3-specific intrinsic for stc. Should be used in the shader preamble.
store("uniform_ir3", [], indices=[BASE])
# IR3-specific intrinsic for ldc.k. Copies UBO to constant file.
# base is the const file base in components, range is the amount to copy in
# vec4's.
intrinsic("copy_ubo_to_uniform_ir3", [1, 1], indices=[BASE, RANGE])
# DXIL specific intrinsics
# src[] = { value, mask, index, offset }.
intrinsic("store_ssbo_masked_dxil", [1, 1, 1, 1])

View File

@ -395,6 +395,7 @@ static const struct opc_info {
OPC(6, OPC_GETWID, getwid),
OPC(6, OPC_GETFIBERID, getfiberid),
OPC(6, OPC_STC, stc),
OPC(6, OPC_LDC_K, ldc.k),
OPC(6, OPC_SPILL_MACRO, spill.macro),
OPC(6, OPC_RELOAD_MACRO, reload.macro),

View File

@ -355,6 +355,8 @@ typedef enum {
OPC_SPILL_MACRO = _OPC(6, 79),
OPC_RELOAD_MACRO = _OPC(6, 80),
OPC_LDC_K = _OPC(6, 81),
/* category 7: */
OPC_BAR = _OPC(7, 0),
OPC_FENCE = _OPC(7, 1),

View File

@ -2389,6 +2389,7 @@ INSTR2(QUAD_SHUFFLE_BRCST)
INSTR1(QUAD_SHUFFLE_HORIZ)
INSTR1(QUAD_SHUFFLE_VERT)
INSTR1(QUAD_SHUFFLE_DIAG)
INSTR2NODST(LDC_K)
INSTR2NODST(STC)
#if GPU >= 600
INSTR3NODST(STIB);

View File

@ -899,6 +899,32 @@ emit_intrinsic_load_ubo_ldc(struct ir3_context *ctx, nir_intrinsic_instr *intr,
ir3_split_dest(b, dst, ldc, 0, ncomp);
}
static void
emit_intrinsic_copy_ubo_to_uniform(struct ir3_context *ctx,
nir_intrinsic_instr *intr)
{
struct ir3_block *b = ctx->block;
unsigned base = nir_intrinsic_base(intr);
unsigned size = nir_intrinsic_range(intr);
struct ir3_instruction *addr1 = ir3_get_addr1(ctx, base);
struct ir3_instruction *offset = ir3_get_src(ctx, &intr->src[1])[0];
struct ir3_instruction *idx = ir3_get_src(ctx, &intr->src[0])[0];
struct ir3_instruction *ldc = ir3_LDC_K(b, idx, 0, offset, 0);
ldc->cat6.iim_val = size;
ldc->barrier_class = ldc->barrier_conflict = IR3_BARRIER_CONST_W;
ir3_handle_bindless_cat6(ldc, intr->src[0]);
if (ldc->flags & IR3_INSTR_B)
ctx->so->bindless_ubo = true;
ir3_instr_set_address(ldc, addr1);
array_insert(b, b->keeps, ldc);
}
/* handles direct/indirect UBO reads: */
static void
emit_intrinsic_load_ubo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
@ -2128,6 +2154,9 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
case nir_intrinsic_load_ubo_vec4:
emit_intrinsic_load_ubo_ldc(ctx, intr, dst);
break;
case nir_intrinsic_copy_ubo_to_uniform_ir3:
emit_intrinsic_copy_ubo_to_uniform(ctx, intr);
break;
case nir_intrinsic_load_frag_coord:
ir3_split_dest(b, dst, get_frag_coord(ctx, intr), 0, 4);
break;

View File

@ -414,6 +414,7 @@ static int parse_reg(const char *str)
"p" return 'p';
"s2en" return TOKEN(T_S2EN);
"s" return 's';
"k" return 'k';
"base"[0-9]+ ir3_yylval.num = strtol(yytext+4, NULL, 10); return T_BASE;
"offset"[0-9]+ ir3_yylval.num = strtol(yytext+6, NULL, 10); return T_OFFSET;
"uniform" return T_UNIFORM;

View File

@ -1229,8 +1229,12 @@ cat6_bindless_ibo: cat6_bindless_ibo_opc_1src cat6_typed cat6_dim cat6_type '.'
cat6_bindless_ldc_opc: T_OP_LDC { new_instr(OPC_LDC); }
cat6_bindless_ldc: cat6_bindless_ldc_opc '.' T_OFFSET '.' cat6_immed '.' cat6_bindless_mode dst_reg ',' cat6_reg_or_immed ',' cat6_reg_or_immed {
instr->cat6.d = $3;
/* This is separated from the opcode to avoid lookahead/shift-reduce conflicts */
cat6_bindless_ldc_middle:
T_OFFSET '.' cat6_immed '.' cat6_bindless_mode dst_reg { instr->cat6.d = $1; }
| cat6_immed '.' 'k' '.' cat6_bindless_mode 'c' '[' T_A1 ']' { instr->opc = OPC_LDC_K; }
cat6_bindless_ldc: cat6_bindless_ldc_opc '.' cat6_bindless_ldc_middle ',' cat6_reg_or_immed ',' cat6_reg_or_immed {
instr->cat6.type = TYPE_U32;
/* TODO cleanup ir3 src order: */
swap(instr->srcs[0], instr->srcs[1]);

View File

@ -215,6 +215,8 @@ print_instr_name(struct log_stream *stream, struct ir3_instruction *instr,
mesa_log_stream_printf(stream, ".a1en");
if (instr->opc == OPC_LDC)
mesa_log_stream_printf(stream, ".offset%d", instr->cat6.d);
if (instr->opc == OPC_LDC_K)
mesa_log_stream_printf(stream, ".%d", instr->cat6.iim_val);
if (instr->flags & IR3_INSTR_B) {
mesa_log_stream_printf(
stream, ".base%d",

View File

@ -350,6 +350,10 @@ validate_instr(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr)
validate_reg_size(ctx, instr->srcs[0], instr->cat6.type);
validate_assert(ctx, !(instr->srcs[1]->flags & IR3_REG_HALF));
break;
case OPC_LDC_K:
validate_assert(ctx, !(instr->srcs[0]->flags & IR3_REG_HALF));
validate_assert(ctx, !(instr->srcs[1]->flags & IR3_REG_HALF));
break;
default:
validate_reg_size(ctx, instr->dsts[0], instr->cat6.type);
validate_assert(ctx, !(instr->srcs[0]->flags & IR3_REG_HALF));

View File

@ -348,6 +348,11 @@ static const struct test {
INSTR_6XX(c0260000_00478400, "ldc.offset2.1.imm r0.x, r0.x, 0"), /* ldc.1.mode0.base0 r0.x, r0.x, 0 */
INSTR_6XX(c0260000_00478600, "ldc.offset3.1.imm r0.x, r0.x, 0"), /* ldc.1.mode0.base0 r0.x, r0.x, 0 */
/* dEQP-VK.glsl.conditionals.if.if_else_vertex */
INSTR_6XX(c0360000_00c78100, "ldc.1.k.imm.base0 c[a1.x], 0, 0"), /* ldc.1.k.mode4.base0 c[a1.x], 0, 0 */
/* custom */
INSTR_6XX(c0360003_00c78100, "ldc.4.k.imm.base0 c[a1.x], 0, 0"), /* ldc.4.k.mode4.base0 c[a1.x], 0, 0 */
/* dEQP-VK.glsl.struct.local.nested_struct_array_dynamic_index_fragment */
INSTR_6XX(c1425b50_01803e02, "stp.f32 p[r11.y-176], r0.y, 1"),
INSTR_6XX(c1425b98_02803e14, "stp.f32 p[r11.y-104], r2.z, 2"),

View File

@ -878,27 +878,8 @@ SOFTWARE.
</encode>
</bitset>
<bitset name="ldc" extends="#instruction-cat6-a6xx">
<doc>
LoaD Constant - UBO load
</doc>
<override>
<!-- TODO.. wtf? -->
<expr>{K}</expr>
<display>
{SY}{JP}{NAME}.{TYPE_SIZE}.k.{MODE}{BASE} c[a1.x], {SRC1}, {SRC2}
</display>
<field low="32" high="39" name="TYPE_SIZE_MINUS_ONE" type="uint"/>
</override>
<!--
TODO are these *really* all bindless? Or does that bit have a different
meaning? Maybe I don't have enough ldc examples from deqp-glesN
-->
<display>
{SY}{JP}{NAME}.offset{OFFSET}.{TYPE_SIZE}.{MODE}{BASE} {DST}, {SRC1}, {SRC2}
</display>
<bitset name="#cat6-ldc-common" extends="#instruction-cat6-a6xx">
<pattern pos="0" >x</pattern>
<field low="9" high="10" name="OFFSET" type="uint"/> <!-- D_MINUS_ONE -->
<pattern pos="11" >x</pattern> <!-- TYPED -->
<pattern low="14" high="19">011110</pattern> <!-- OPC -->
<pattern low="20" high="22">1xx</pattern>
@ -910,19 +891,62 @@ SOFTWARE.
<field low="24" high="31" name="SRC1" type="#cat6-src">
<param name="SRC1_IM" as="SRC_IM"/>
</field>
<field low="32" high="39" name="DST" type="#reg-gpr"/>
<pattern low="49" high="51">x11</pattern> <!-- TYPE -->
<field pos="52" name="K" type="bool"/>
<pattern pos="53" >1</pattern>
<encode>
<map name="K">0</map> <!-- TODO.. once we figure out what this is -->
<map name="SRC1_IM">!!(src->srcs[1]->flags &amp; IR3_REG_IMMED)</map>
<map name="OFFSET">src->cat6.d</map>
<map name="SRC1">src->srcs[1]</map>
<map name="SRC2">src->srcs[0]</map>
</encode>
</bitset>
<bitset name="ldc.k" extends="#cat6-ldc-common">
<doc>
ldc.k copies a series of UBO values to constants. In other
words, it acts the same as a series of ldc followed by stc. It's
also similar to a CP_LOAD_STATE with a UBO source but executed
in the shader.
Like CP_LOAD_STATE, the UBO offset and const file offset must be
a multiple of 4 vec4's but it can load any number of vec4's. The
UBO descriptor and offset are the same as a normal ldc. The
const file offset is specified in a1.x and is in units of
components, and the number of vec4's to copy is specified in
LOAD_SIZE.
</doc>
<display>
{SY}{JP}ldc.{LOAD_SIZE}.k.{MODE}{BASE} c[a1.x], {SRC1}, {SRC2}
</display>
<derived name="LOAD_SIZE" expr="#cat6-load-size" type="uint"/>
<field low="32" high="39" name="LOAD_SIZE_MINUS_ONE" type="uint"/>
<pattern low="9" high="10">xx</pattern> <!-- D_MINUS_ONE -->
<pattern low="52" high="53">11</pattern>
<encode>
<map name="LOAD_SIZE_MINUS_ONE">src->cat6.iim_val - 1</map>
</encode>
</bitset>
<bitset name="ldc" extends="#cat6-ldc-common">
<doc>
LoaD Constant - UBO load
</doc>
<!--
TODO are these *really* all bindless? Or does that bit have a different
meaning? Maybe I don't have enough ldc examples from deqp-glesN
-->
<display>
{SY}{JP}{NAME}.offset{OFFSET}.{TYPE_SIZE}.{MODE}{BASE} {DST}, {SRC1}, {SRC2}
</display>
<field low="9" high="10" name="OFFSET" type="uint"/> <!-- D_MINUS_ONE -->
<field low="32" high="39" name="DST" type="#reg-gpr"/>
<pattern low="52" high="53">10</pattern>
<encode>
<map name="OFFSET">src->cat6.d</map>
</encode>
</bitset>
<bitset name="getspid" extends="#instruction-cat6-a6xx">
<doc>
GET Shader Processor ID?
@ -1135,6 +1159,10 @@ SOFTWARE.
{TYPE_SIZE_MINUS_ONE} + 1
</expr>
<expr name="#cat6-load-size">
{LOAD_SIZE_MINUS_ONE} + 1
</expr>
<bitset name="#cat6-typed" size="1">
<override>
<expr>{TYPED}</expr>