asahi: split CDM Launch words

similarly separates counts from USC words.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29247>
This commit is contained in:
Alyssa Rosenzweig 2024-04-22 19:48:43 -04:00
parent 5fbd8bb694
commit a173c2e38c
3 changed files with 29 additions and 17 deletions

View File

@ -1010,14 +1010,17 @@
<value name="Indirect local" value="2"/>
</enum>
<struct name="CDM Launch" size="8">
<struct name="CDM Launch Word 0" size="4">
<field name="Uniform register count" size="3" start="1" type="uint" modifier="groups(64)"/>
<field name="Texture state register count" size="5" start="4" type="uint" modifier="groups(8)"/>
<field name="Sampler state register count" size="3" start="9" type="Sampler states"/>
<field name="Preshader register count" size="4" start="12" type="uint" modifier="groups(16)"/>
<field name="Mode" size="2" start="27" type="CDM Mode"/>
<field name="Block Type" size="3" start="29" type="CDM Block Type" default="Launch"/>
<field name="Pipeline" size="26" start="1:6" type="address" modifier="shr(6)"/>
</struct>
<struct name="CDM Launch Word 1" size="4">
<field name="Pipeline" size="26" start="6" type="address" modifier="shr(6)"/>
</struct>
<struct name="CDM Unk G14X" size="8">

View File

@ -665,7 +665,8 @@ agxdecode_cdm(struct agxdecode_ctx *ctx, const uint8_t *map, uint64_t *link,
switch (block_type) {
case AGX_CDM_BLOCK_TYPE_LAUNCH: {
size_t length = AGX_CDM_LAUNCH_LENGTH;
size_t length =
AGX_CDM_LAUNCH_WORD_0_LENGTH + AGX_CDM_LAUNCH_WORD_1_LENGTH;
#define CDM_PRINT(STRUCT_NAME, human) \
do { \
@ -674,17 +675,20 @@ agxdecode_cdm(struct agxdecode_ctx *ctx, const uint8_t *map, uint64_t *link,
length += AGX_CDM_##STRUCT_NAME##_LENGTH; \
} while (0);
agx_unpack(agxdecode_dump_stream, map, CDM_LAUNCH, hdr);
agxdecode_stateful(ctx, hdr.pipeline, "Pipeline", agxdecode_usc, verbose,
params, &hdr.sampler_state_register_count);
DUMP_UNPACKED(CDM_LAUNCH, hdr, "Compute\n");
map += AGX_CDM_LAUNCH_LENGTH;
agx_unpack(agxdecode_dump_stream, map + 0, CDM_LAUNCH_WORD_0, hdr0);
agx_unpack(agxdecode_dump_stream, map + 4, CDM_LAUNCH_WORD_1, hdr1);
agxdecode_stateful(ctx, hdr1.pipeline, "Pipeline", agxdecode_usc, verbose,
params, &hdr0.sampler_state_register_count);
DUMP_UNPACKED(CDM_LAUNCH_WORD_0, hdr0, "Compute\n");
DUMP_UNPACKED(CDM_LAUNCH_WORD_1, hdr1, "Compute\n");
map += 8;
/* Added in G14X */
if (params->gpu_generation >= 14 && params->num_clusters_total > 1)
CDM_PRINT(UNK_G14X, "Unknown G14X");
switch (hdr.mode) {
switch (hdr0.mode) {
case AGX_CDM_MODE_DIRECT:
CDM_PRINT(GLOBAL_SIZE, "Global size");
CDM_PRINT(LOCAL_SIZE, "Local size");
@ -697,7 +701,7 @@ agxdecode_cdm(struct agxdecode_ctx *ctx, const uint8_t *map, uint64_t *link,
CDM_PRINT(INDIRECT, "Indirect buffer");
break;
default:
fprintf(agxdecode_dump_stream, "Unknown CDM mode: %u\n", hdr.mode);
fprintf(agxdecode_dump_stream, "Unknown CDM mode: %u\n", hdr0.mode);
break;
}

View File

@ -4037,9 +4037,10 @@ agx_launch_gs_prerast(struct agx_batch *batch,
agx_ensure_cmdbuf_has_space(
batch, &batch->cdm,
8 * (AGX_CDM_LAUNCH_LENGTH + AGX_CDM_UNK_G14X_LENGTH +
AGX_CDM_INDIRECT_LENGTH + AGX_CDM_GLOBAL_SIZE_LENGTH +
AGX_CDM_LOCAL_SIZE_LENGTH + AGX_CDM_BARRIER_LENGTH));
8 * (AGX_CDM_LAUNCH_WORD_0_LENGTH + AGX_CDM_LAUNCH_WORD_1_LENGTH +
AGX_CDM_UNK_G14X_LENGTH + AGX_CDM_INDIRECT_LENGTH +
AGX_CDM_GLOBAL_SIZE_LENGTH + AGX_CDM_LOCAL_SIZE_LENGTH +
AGX_CDM_BARRIER_LENGTH));
assert(!info->primitive_restart && "should have been lowered");
@ -5216,7 +5217,7 @@ agx_launch(struct agx_batch *batch, const struct pipe_grid_info *info,
/* TODO: Ensure space if we allow multiple kernels in a batch */
uint8_t *out = batch->cdm.current;
agx_push(out, CDM_LAUNCH, cfg) {
agx_push(out, CDM_LAUNCH_WORD_0, cfg) {
if (info->indirect)
cfg.mode = AGX_CDM_MODE_INDIRECT_GLOBAL;
else
@ -5227,6 +5228,9 @@ agx_launch(struct agx_batch *batch, const struct pipe_grid_info *info,
cfg.texture_state_register_count = agx_nr_tex_descriptors(batch, cs);
cfg.sampler_state_register_count =
translate_sampler_state_count(ctx, cs, stage);
}
agx_push(out, CDM_LAUNCH_WORD_1, cfg) {
cfg.pipeline =
agx_build_pipeline(batch, cs, linked, PIPE_SHADER_COMPUTE,
info->variable_shared_mem, subgroups_per_core);
@ -5367,9 +5371,10 @@ agx_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
* in practice, we can use CDM stream links.
*/
size_t dispatch_upper_bound =
AGX_CDM_LAUNCH_LENGTH + AGX_CDM_UNK_G14X_LENGTH +
AGX_CDM_INDIRECT_LENGTH + AGX_CDM_GLOBAL_SIZE_LENGTH +
AGX_CDM_LOCAL_SIZE_LENGTH + AGX_CDM_BARRIER_LENGTH;
AGX_CDM_LAUNCH_WORD_0_LENGTH + AGX_CDM_LAUNCH_WORD_1_LENGTH +
AGX_CDM_UNK_G14X_LENGTH + AGX_CDM_INDIRECT_LENGTH +
AGX_CDM_GLOBAL_SIZE_LENGTH + AGX_CDM_LOCAL_SIZE_LENGTH +
AGX_CDM_BARRIER_LENGTH;
if (batch->cdm.current + dispatch_upper_bound >= batch->cdm.end)
agx_flush_batch_for_reason(ctx, batch, "CDM overfull");