panfrost: XMLify invocations
Not so bad :) Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6476>
This commit is contained in:
parent
0225ed4d72
commit
02e768e6a9
|
@ -105,6 +105,7 @@ panfrost_launch_grid(struct pipe_context *pipe,
|
|||
|
||||
/* TODO: Stub */
|
||||
struct midgard_payload_vertex_tiler payload = { 0 };
|
||||
struct mali_invocation_packed invocation;
|
||||
struct mali_draw_packed postfix;
|
||||
|
||||
/* We implement OpenCL inputs as uniforms (or a UBO -- same thing), so
|
||||
|
@ -139,12 +140,13 @@ panfrost_launch_grid(struct pipe_context *pipe,
|
|||
|
||||
/* Invoke according to the grid info */
|
||||
|
||||
panfrost_pack_work_groups_compute(&payload.prefix,
|
||||
panfrost_pack_work_groups_compute(&invocation,
|
||||
info->grid[0], info->grid[1],
|
||||
info->grid[2],
|
||||
info->block[0], info->block[1],
|
||||
info->block[2],
|
||||
false);
|
||||
payload.prefix.invocation = invocation;
|
||||
|
||||
panfrost_new_job(&batch->pool, &batch->scoreboard,
|
||||
MALI_JOB_TYPE_COMPUTE, true, 0, &payload,
|
||||
|
|
|
@ -327,6 +327,7 @@ panfrost_draw_vbo(
|
|||
struct mali_vertex_tiler_prefix vertex_prefix = { 0 }, tiler_prefix = { 0 };
|
||||
struct mali_draw_packed vertex_postfix, tiler_postfix;
|
||||
struct mali_primitive_packed primitive;
|
||||
struct mali_invocation_packed invocation;
|
||||
union midgard_primitive_size primitive_size;
|
||||
unsigned vertex_count = ctx->vertex_count;
|
||||
|
||||
|
@ -373,9 +374,12 @@ panfrost_draw_vbo(
|
|||
|
||||
panfrost_statistics_record(ctx, info);
|
||||
|
||||
panfrost_pack_work_groups_fused(&vertex_prefix, &tiler_prefix,
|
||||
panfrost_pack_work_groups_compute(&invocation,
|
||||
1, vertex_count, info->instance_count,
|
||||
1, 1, 1);
|
||||
1, 1, 1, true);
|
||||
|
||||
vertex_prefix.invocation = invocation;
|
||||
tiler_prefix.invocation = invocation;
|
||||
|
||||
/* Emit all sort of descriptors. */
|
||||
mali_ptr varyings = 0, vs_vary = 0, fs_vary = 0, pos = 0, psiz = 0;
|
||||
|
|
|
@ -199,11 +199,15 @@ bit_vertex(struct panfrost_device *dev, panfrost_program prog,
|
|||
},
|
||||
};
|
||||
|
||||
panfrost_pack_work_groups_compute(&payload.prefix,
|
||||
struct mali_invocation_packed invocation;
|
||||
|
||||
panfrost_pack_work_groups_compute(&invocation,
|
||||
1, 1, 1,
|
||||
1, 1, 1,
|
||||
true);
|
||||
|
||||
payload.prefix.invocation = invocation;
|
||||
|
||||
struct panfrost_bo *bos[] = {
|
||||
scratchpad, shmem, shader, shader_desc, ubo, var, attr
|
||||
};
|
||||
|
|
|
@ -421,42 +421,7 @@ struct mali_payload_write_value {
|
|||
*/
|
||||
|
||||
struct mali_vertex_tiler_prefix {
|
||||
/* This is a dynamic bitfield containing the following things in this order:
|
||||
*
|
||||
* - gl_WorkGroupSize.x
|
||||
* - gl_WorkGroupSize.y
|
||||
* - gl_WorkGroupSize.z
|
||||
* - gl_NumWorkGroups.x
|
||||
* - gl_NumWorkGroups.y
|
||||
* - gl_NumWorkGroups.z
|
||||
*
|
||||
* The number of bits allocated for each number is based on the *_shift
|
||||
* fields below. For example, workgroups_y_shift gives the bit that
|
||||
* gl_NumWorkGroups.y starts at, and workgroups_z_shift gives the bit
|
||||
* that gl_NumWorkGroups.z starts at (and therefore one after the bit
|
||||
* that gl_NumWorkGroups.y ends at). The actual value for each gl_*
|
||||
* value is one more than the stored value, since if any of the values
|
||||
* are zero, then there would be no invocations (and hence no job). If
|
||||
* there were 0 bits allocated to a given field, then it must be zero,
|
||||
* and hence the real value is one.
|
||||
*
|
||||
* Vertex jobs reuse the same job dispatch mechanism as compute jobs,
|
||||
* effectively doing glDispatchCompute(1, vertex_count, instance_count)
|
||||
* where vertex count is the number of vertices.
|
||||
*/
|
||||
u32 invocation_count;
|
||||
|
||||
/* Bitfield for shifts:
|
||||
*
|
||||
* size_y_shift : 5
|
||||
* size_z_shift : 5
|
||||
* workgroups_x_shift : 6
|
||||
* workgroups_y_shift : 6
|
||||
* workgroups_z_shift : 6
|
||||
* workgroups_x_shift_2 : 4
|
||||
*/
|
||||
u32 invocation_shifts;
|
||||
|
||||
struct mali_invocation_packed invocation;
|
||||
struct mali_primitive_packed primitive;
|
||||
} __attribute__((packed));
|
||||
|
||||
|
|
|
@ -1248,21 +1248,17 @@ pandecode_vertex_tiler_prefix(struct mali_vertex_tiler_prefix *p, int job_no, bo
|
|||
/* Decode invocation_count. See the comment before the definition of
|
||||
* invocation_count for an explanation.
|
||||
*/
|
||||
struct MALI_INVOCATION invocation;
|
||||
struct mali_invocation_packed invocation_packed = p->invocation;
|
||||
MALI_INVOCATION_unpack((const uint8_t *) &invocation_packed, &invocation);
|
||||
|
||||
unsigned size_y_shift = bits(p->invocation_shifts, 0, 5);
|
||||
unsigned size_z_shift = bits(p->invocation_shifts, 5, 10);
|
||||
unsigned workgroups_x_shift = bits(p->invocation_shifts, 10, 16);
|
||||
unsigned workgroups_y_shift = bits(p->invocation_shifts, 16, 22);
|
||||
unsigned workgroups_z_shift = bits(p->invocation_shifts, 22, 28);
|
||||
unsigned workgroups_x_shift_2 = bits(p->invocation_shifts, 28, 32);
|
||||
unsigned size_x = bits(invocation.invocations, 0, invocation.size_y_shift) + 1;
|
||||
unsigned size_y = bits(invocation.invocations, invocation.size_y_shift, invocation.size_z_shift) + 1;
|
||||
unsigned size_z = bits(invocation.invocations, invocation.size_z_shift, invocation.workgroups_x_shift) + 1;
|
||||
|
||||
unsigned size_x = bits(p->invocation_count, 0, size_y_shift) + 1;
|
||||
unsigned size_y = bits(p->invocation_count, size_y_shift, size_z_shift) + 1;
|
||||
unsigned size_z = bits(p->invocation_count, size_z_shift, workgroups_x_shift) + 1;
|
||||
|
||||
unsigned groups_x = bits(p->invocation_count, workgroups_x_shift, workgroups_y_shift) + 1;
|
||||
unsigned groups_y = bits(p->invocation_count, workgroups_y_shift, workgroups_z_shift) + 1;
|
||||
unsigned groups_z = bits(p->invocation_count, workgroups_z_shift, 32) + 1;
|
||||
unsigned groups_x = bits(invocation.invocations, invocation.workgroups_x_shift, invocation.workgroups_y_shift) + 1;
|
||||
unsigned groups_y = bits(invocation.invocations, invocation.workgroups_y_shift, invocation.workgroups_z_shift) + 1;
|
||||
unsigned groups_z = bits(invocation.invocations, invocation.workgroups_z_shift, 32) + 1;
|
||||
|
||||
/* Even though we have this decoded, we want to ensure that the
|
||||
* representation is "unique" so we don't lose anything by printing only
|
||||
|
@ -1272,30 +1268,17 @@ pandecode_vertex_tiler_prefix(struct mali_vertex_tiler_prefix *p, int job_no, bo
|
|||
* decode and pack it ourselves! If it is bit exact with what we
|
||||
* decoded, we're good to go. */
|
||||
|
||||
struct mali_vertex_tiler_prefix ref;
|
||||
struct mali_invocation_packed ref;
|
||||
panfrost_pack_work_groups_compute(&ref, groups_x, groups_y, groups_z, size_x, size_y, size_z, graphics);
|
||||
|
||||
bool canonical =
|
||||
(p->invocation_count == ref.invocation_count) &&
|
||||
(p->invocation_shifts == ref.invocation_shifts);
|
||||
|
||||
if (!canonical) {
|
||||
if (memcmp(&ref, &invocation_packed, sizeof(ref))) {
|
||||
pandecode_msg("XXX: non-canonical workgroups packing\n");
|
||||
pandecode_msg("expected: %X, %X",
|
||||
ref.invocation_count,
|
||||
ref.invocation_shifts);
|
||||
|
||||
pandecode_prop("invocation_count = 0x%" PRIx32, p->invocation_count);
|
||||
pandecode_prop("size_y_shift = %d", size_y_shift);
|
||||
pandecode_prop("size_z_shift = %d", size_z_shift);
|
||||
pandecode_prop("workgroups_x_shift = %d", workgroups_x_shift);
|
||||
pandecode_prop("workgroups_y_shift = %d", workgroups_y_shift);
|
||||
pandecode_prop("workgroups_z_shift = %d", workgroups_z_shift);
|
||||
pandecode_prop("workgroups_x_shift_2 = %d", workgroups_x_shift_2);
|
||||
MALI_INVOCATION_print(pandecode_dump_stream, &invocation, 1 * 2);
|
||||
}
|
||||
|
||||
/* Regardless, print the decode */
|
||||
pandecode_msg("size (%d, %d, %d), count (%d, %d, %d)\n",
|
||||
fprintf(pandecode_dump_stream,
|
||||
"Invocation (%d, %d, %d) x (%d, %d, %d)\n",
|
||||
size_x, size_y, size_z,
|
||||
groups_x, groups_y, groups_z);
|
||||
|
||||
|
|
|
@ -346,6 +346,7 @@ panfrost_load_midg(
|
|||
struct midgard_payload_vertex_tiler payload = {};
|
||||
struct mali_primitive_packed primitive;
|
||||
struct mali_draw_packed draw;
|
||||
struct mali_invocation_packed invocation;
|
||||
|
||||
pan_pack(&draw, DRAW, cfg) {
|
||||
cfg.unknown_1 = 0x7;
|
||||
|
@ -365,10 +366,11 @@ panfrost_load_midg(
|
|||
cfg.unknown_3 = 6;
|
||||
}
|
||||
|
||||
memcpy(&payload.prefix.primitive, &primitive, MALI_DRAW_LENGTH);
|
||||
memcpy(&payload.postfix, &draw, MALI_DRAW_LENGTH);
|
||||
panfrost_pack_work_groups_compute(&invocation, 1, vertex_count, 1, 1, 1, 1, true);
|
||||
|
||||
panfrost_pack_work_groups_compute(&payload.prefix, 1, vertex_count, 1, 1, 1, 1, true);
|
||||
payload.prefix.primitive = primitive;
|
||||
memcpy(&payload.postfix, &draw, MALI_DRAW_LENGTH);
|
||||
payload.prefix.invocation = invocation;
|
||||
|
||||
panfrost_new_job(pool, scoreboard, MALI_JOB_TYPE_TILER, false, 0, &payload, sizeof(payload), true);
|
||||
}
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
|
||||
void
|
||||
panfrost_pack_work_groups_compute(
|
||||
struct mali_vertex_tiler_prefix *out,
|
||||
struct mali_invocation_packed *out,
|
||||
unsigned num_x,
|
||||
unsigned num_y,
|
||||
unsigned num_z,
|
||||
|
@ -43,17 +43,6 @@ panfrost_pack_work_groups_compute(
|
|||
unsigned size_z,
|
||||
bool quirk_graphics);
|
||||
|
||||
void
|
||||
panfrost_pack_work_groups_fused(
|
||||
struct mali_vertex_tiler_prefix *vertex,
|
||||
struct mali_vertex_tiler_prefix *tiler,
|
||||
unsigned num_x,
|
||||
unsigned num_y,
|
||||
unsigned num_z,
|
||||
unsigned size_x,
|
||||
unsigned size_y,
|
||||
unsigned size_z);
|
||||
|
||||
/* Tiler structure size computation */
|
||||
|
||||
unsigned
|
||||
|
|
|
@ -41,7 +41,7 @@
|
|||
|
||||
void
|
||||
panfrost_pack_work_groups_compute(
|
||||
struct mali_vertex_tiler_prefix *out,
|
||||
struct mali_invocation_packed *out,
|
||||
unsigned num_x,
|
||||
unsigned num_y,
|
||||
unsigned num_z,
|
||||
|
@ -77,53 +77,24 @@ panfrost_pack_work_groups_compute(
|
|||
shifts[i + 1] = shifts[i] + bit_count;
|
||||
}
|
||||
|
||||
/* Quirk: for non-instanced graphics, the blob sets workgroups_z_shift
|
||||
* = 32. This doesn't appear to matter to the hardware, but it's good
|
||||
* to be bit-identical. */
|
||||
pan_pack(out, INVOCATION, cfg) {
|
||||
cfg.invocations = packed;
|
||||
cfg.size_y_shift = shifts[1];
|
||||
cfg.size_z_shift = shifts[2];
|
||||
cfg.workgroups_x_shift = shifts[3];
|
||||
cfg.workgroups_y_shift = shifts[4];
|
||||
cfg.workgroups_z_shift = shifts[5];
|
||||
|
||||
if (quirk_graphics && (num_z <= 1))
|
||||
shifts[5] = 32;
|
||||
/* Quirk: for non-instanced graphics, the blob sets
|
||||
* workgroups_z_shift = 32. This doesn't appear to matter to
|
||||
* the hardware, but it's good to be bit-identical. */
|
||||
|
||||
/* Quirk: for graphics, workgroups_x_shift_2 must be at least 2,
|
||||
* whereas for OpenCL it is simply equal to workgroups_x_shift. For GL
|
||||
* compute, it is always 2 if no barriers are in use, but is equal to
|
||||
* workgroups_x_shift is barriers are in use. */
|
||||
if (quirk_graphics && (num_z <= 1))
|
||||
cfg.workgroups_z_shift = 32;
|
||||
|
||||
unsigned shift_2 = shifts[3];
|
||||
/* Quirk: for graphics, >= 2. For compute, 2 without barriers
|
||||
* but equal to workgroups_x_shift with barriers */
|
||||
|
||||
if (quirk_graphics)
|
||||
shift_2 = MAX2(shift_2, 2);
|
||||
|
||||
/* Pack them in */
|
||||
uint32_t packed_shifts =
|
||||
(shifts[1] << 0) |
|
||||
(shifts[2] << 5) |
|
||||
(shifts[3] << 10) |
|
||||
(shifts[4] << 16) |
|
||||
(shifts[5] << 22) |
|
||||
(shift_2 << 28);
|
||||
|
||||
/* Upload the packed bitfields */
|
||||
out->invocation_count = packed;
|
||||
out->invocation_shifts = packed_shifts;
|
||||
cfg.unknown_shift = quirk_graphics ? 2 : cfg.workgroups_x_shift;
|
||||
}
|
||||
}
|
||||
|
||||
/* Packs vertex/tiler descriptors simultaneously */
|
||||
void
|
||||
panfrost_pack_work_groups_fused(
|
||||
struct mali_vertex_tiler_prefix *vertex,
|
||||
struct mali_vertex_tiler_prefix *tiler,
|
||||
unsigned num_x,
|
||||
unsigned num_y,
|
||||
unsigned num_z,
|
||||
unsigned size_x,
|
||||
unsigned size_y,
|
||||
unsigned size_z)
|
||||
{
|
||||
panfrost_pack_work_groups_compute(vertex, num_x, num_y, num_z, size_x, size_y, size_z, true);
|
||||
|
||||
/* Copy results over */
|
||||
tiler->invocation_count = vertex->invocation_count;
|
||||
tiler->invocation_shifts = vertex->invocation_shifts;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue