diff --git a/src/gallium/drivers/panfrost/pan_compute.c b/src/gallium/drivers/panfrost/pan_compute.c index 53a23bcb6a0..cef9ec46aaa 100644 --- a/src/gallium/drivers/panfrost/pan_compute.c +++ b/src/gallium/drivers/panfrost/pan_compute.c @@ -105,6 +105,7 @@ panfrost_launch_grid(struct pipe_context *pipe, /* TODO: Stub */ struct midgard_payload_vertex_tiler payload = { 0 }; + struct mali_invocation_packed invocation; struct mali_draw_packed postfix; /* We implement OpenCL inputs as uniforms (or a UBO -- same thing), so @@ -139,12 +140,13 @@ panfrost_launch_grid(struct pipe_context *pipe, /* Invoke according to the grid info */ - panfrost_pack_work_groups_compute(&payload.prefix, + panfrost_pack_work_groups_compute(&invocation, info->grid[0], info->grid[1], info->grid[2], info->block[0], info->block[1], info->block[2], false); + payload.prefix.invocation = invocation; panfrost_new_job(&batch->pool, &batch->scoreboard, MALI_JOB_TYPE_COMPUTE, true, 0, &payload, diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index 4da307dcfcd..640fe9ee309 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -327,6 +327,7 @@ panfrost_draw_vbo( struct mali_vertex_tiler_prefix vertex_prefix = { 0 }, tiler_prefix = { 0 }; struct mali_draw_packed vertex_postfix, tiler_postfix; struct mali_primitive_packed primitive; + struct mali_invocation_packed invocation; union midgard_primitive_size primitive_size; unsigned vertex_count = ctx->vertex_count; @@ -373,9 +374,12 @@ panfrost_draw_vbo( panfrost_statistics_record(ctx, info); - panfrost_pack_work_groups_fused(&vertex_prefix, &tiler_prefix, + panfrost_pack_work_groups_compute(&invocation, 1, vertex_count, info->instance_count, - 1, 1, 1); + 1, 1, 1, true); + + vertex_prefix.invocation = invocation; + tiler_prefix.invocation = invocation; /* Emit all sort of descriptors. */ mali_ptr varyings = 0, vs_vary = 0, fs_vary = 0, pos = 0, psiz = 0; diff --git a/src/panfrost/bifrost/test/bi_submit.c b/src/panfrost/bifrost/test/bi_submit.c index 8b46569971f..73c971df4dd 100644 --- a/src/panfrost/bifrost/test/bi_submit.c +++ b/src/panfrost/bifrost/test/bi_submit.c @@ -199,11 +199,15 @@ bit_vertex(struct panfrost_device *dev, panfrost_program prog, }, }; - panfrost_pack_work_groups_compute(&payload.prefix, + struct mali_invocation_packed invocation; + + panfrost_pack_work_groups_compute(&invocation, 1, 1, 1, 1, 1, 1, true); + payload.prefix.invocation = invocation; + struct panfrost_bo *bos[] = { scratchpad, shmem, shader, shader_desc, ubo, var, attr }; diff --git a/src/panfrost/include/panfrost-job.h b/src/panfrost/include/panfrost-job.h index 8cb30ed0cbf..879023eacb4 100644 --- a/src/panfrost/include/panfrost-job.h +++ b/src/panfrost/include/panfrost-job.h @@ -421,42 +421,7 @@ struct mali_payload_write_value { */ struct mali_vertex_tiler_prefix { - /* This is a dynamic bitfield containing the following things in this order: - * - * - gl_WorkGroupSize.x - * - gl_WorkGroupSize.y - * - gl_WorkGroupSize.z - * - gl_NumWorkGroups.x - * - gl_NumWorkGroups.y - * - gl_NumWorkGroups.z - * - * The number of bits allocated for each number is based on the *_shift - * fields below. For example, workgroups_y_shift gives the bit that - * gl_NumWorkGroups.y starts at, and workgroups_z_shift gives the bit - * that gl_NumWorkGroups.z starts at (and therefore one after the bit - * that gl_NumWorkGroups.y ends at). The actual value for each gl_* - * value is one more than the stored value, since if any of the values - * are zero, then there would be no invocations (and hence no job). If - * there were 0 bits allocated to a given field, then it must be zero, - * and hence the real value is one. - * - * Vertex jobs reuse the same job dispatch mechanism as compute jobs, - * effectively doing glDispatchCompute(1, vertex_count, instance_count) - * where vertex count is the number of vertices. - */ - u32 invocation_count; - - /* Bitfield for shifts: - * - * size_y_shift : 5 - * size_z_shift : 5 - * workgroups_x_shift : 6 - * workgroups_y_shift : 6 - * workgroups_z_shift : 6 - * workgroups_x_shift_2 : 4 - */ - u32 invocation_shifts; - + struct mali_invocation_packed invocation; struct mali_primitive_packed primitive; } __attribute__((packed)); diff --git a/src/panfrost/lib/decode.c b/src/panfrost/lib/decode.c index 78b0a542ae5..c6e23b1eda9 100644 --- a/src/panfrost/lib/decode.c +++ b/src/panfrost/lib/decode.c @@ -1248,21 +1248,17 @@ pandecode_vertex_tiler_prefix(struct mali_vertex_tiler_prefix *p, int job_no, bo /* Decode invocation_count. See the comment before the definition of * invocation_count for an explanation. */ + struct MALI_INVOCATION invocation; + struct mali_invocation_packed invocation_packed = p->invocation; + MALI_INVOCATION_unpack((const uint8_t *) &invocation_packed, &invocation); - unsigned size_y_shift = bits(p->invocation_shifts, 0, 5); - unsigned size_z_shift = bits(p->invocation_shifts, 5, 10); - unsigned workgroups_x_shift = bits(p->invocation_shifts, 10, 16); - unsigned workgroups_y_shift = bits(p->invocation_shifts, 16, 22); - unsigned workgroups_z_shift = bits(p->invocation_shifts, 22, 28); - unsigned workgroups_x_shift_2 = bits(p->invocation_shifts, 28, 32); + unsigned size_x = bits(invocation.invocations, 0, invocation.size_y_shift) + 1; + unsigned size_y = bits(invocation.invocations, invocation.size_y_shift, invocation.size_z_shift) + 1; + unsigned size_z = bits(invocation.invocations, invocation.size_z_shift, invocation.workgroups_x_shift) + 1; - unsigned size_x = bits(p->invocation_count, 0, size_y_shift) + 1; - unsigned size_y = bits(p->invocation_count, size_y_shift, size_z_shift) + 1; - unsigned size_z = bits(p->invocation_count, size_z_shift, workgroups_x_shift) + 1; - - unsigned groups_x = bits(p->invocation_count, workgroups_x_shift, workgroups_y_shift) + 1; - unsigned groups_y = bits(p->invocation_count, workgroups_y_shift, workgroups_z_shift) + 1; - unsigned groups_z = bits(p->invocation_count, workgroups_z_shift, 32) + 1; + unsigned groups_x = bits(invocation.invocations, invocation.workgroups_x_shift, invocation.workgroups_y_shift) + 1; + unsigned groups_y = bits(invocation.invocations, invocation.workgroups_y_shift, invocation.workgroups_z_shift) + 1; + unsigned groups_z = bits(invocation.invocations, invocation.workgroups_z_shift, 32) + 1; /* Even though we have this decoded, we want to ensure that the * representation is "unique" so we don't lose anything by printing only @@ -1272,30 +1268,17 @@ pandecode_vertex_tiler_prefix(struct mali_vertex_tiler_prefix *p, int job_no, bo * decode and pack it ourselves! If it is bit exact with what we * decoded, we're good to go. */ - struct mali_vertex_tiler_prefix ref; + struct mali_invocation_packed ref; panfrost_pack_work_groups_compute(&ref, groups_x, groups_y, groups_z, size_x, size_y, size_z, graphics); - bool canonical = - (p->invocation_count == ref.invocation_count) && - (p->invocation_shifts == ref.invocation_shifts); - - if (!canonical) { + if (memcmp(&ref, &invocation_packed, sizeof(ref))) { pandecode_msg("XXX: non-canonical workgroups packing\n"); - pandecode_msg("expected: %X, %X", - ref.invocation_count, - ref.invocation_shifts); - - pandecode_prop("invocation_count = 0x%" PRIx32, p->invocation_count); - pandecode_prop("size_y_shift = %d", size_y_shift); - pandecode_prop("size_z_shift = %d", size_z_shift); - pandecode_prop("workgroups_x_shift = %d", workgroups_x_shift); - pandecode_prop("workgroups_y_shift = %d", workgroups_y_shift); - pandecode_prop("workgroups_z_shift = %d", workgroups_z_shift); - pandecode_prop("workgroups_x_shift_2 = %d", workgroups_x_shift_2); + MALI_INVOCATION_print(pandecode_dump_stream, &invocation, 1 * 2); } /* Regardless, print the decode */ - pandecode_msg("size (%d, %d, %d), count (%d, %d, %d)\n", + fprintf(pandecode_dump_stream, + "Invocation (%d, %d, %d) x (%d, %d, %d)\n", size_x, size_y, size_z, groups_x, groups_y, groups_z); diff --git a/src/panfrost/lib/pan_blit.c b/src/panfrost/lib/pan_blit.c index f9ca87ea588..79a91200d8e 100644 --- a/src/panfrost/lib/pan_blit.c +++ b/src/panfrost/lib/pan_blit.c @@ -346,6 +346,7 @@ panfrost_load_midg( struct midgard_payload_vertex_tiler payload = {}; struct mali_primitive_packed primitive; struct mali_draw_packed draw; + struct mali_invocation_packed invocation; pan_pack(&draw, DRAW, cfg) { cfg.unknown_1 = 0x7; @@ -365,10 +366,11 @@ panfrost_load_midg( cfg.unknown_3 = 6; } - memcpy(&payload.prefix.primitive, &primitive, MALI_DRAW_LENGTH); - memcpy(&payload.postfix, &draw, MALI_DRAW_LENGTH); + panfrost_pack_work_groups_compute(&invocation, 1, vertex_count, 1, 1, 1, 1, true); - panfrost_pack_work_groups_compute(&payload.prefix, 1, vertex_count, 1, 1, 1, 1, true); + payload.prefix.primitive = primitive; + memcpy(&payload.postfix, &draw, MALI_DRAW_LENGTH); + payload.prefix.invocation = invocation; panfrost_new_job(pool, scoreboard, MALI_JOB_TYPE_TILER, false, 0, &payload, sizeof(payload), true); } diff --git a/src/panfrost/lib/pan_encoder.h b/src/panfrost/lib/pan_encoder.h index 0471701dbfb..9433f02de61 100644 --- a/src/panfrost/lib/pan_encoder.h +++ b/src/panfrost/lib/pan_encoder.h @@ -34,7 +34,7 @@ void panfrost_pack_work_groups_compute( - struct mali_vertex_tiler_prefix *out, + struct mali_invocation_packed *out, unsigned num_x, unsigned num_y, unsigned num_z, @@ -43,17 +43,6 @@ panfrost_pack_work_groups_compute( unsigned size_z, bool quirk_graphics); -void -panfrost_pack_work_groups_fused( - struct mali_vertex_tiler_prefix *vertex, - struct mali_vertex_tiler_prefix *tiler, - unsigned num_x, - unsigned num_y, - unsigned num_z, - unsigned size_x, - unsigned size_y, - unsigned size_z); - /* Tiler structure size computation */ unsigned diff --git a/src/panfrost/lib/pan_invocation.c b/src/panfrost/lib/pan_invocation.c index cfb5becbf61..4c0f6457851 100644 --- a/src/panfrost/lib/pan_invocation.c +++ b/src/panfrost/lib/pan_invocation.c @@ -41,7 +41,7 @@ void panfrost_pack_work_groups_compute( - struct mali_vertex_tiler_prefix *out, + struct mali_invocation_packed *out, unsigned num_x, unsigned num_y, unsigned num_z, @@ -77,53 +77,24 @@ panfrost_pack_work_groups_compute( shifts[i + 1] = shifts[i] + bit_count; } - /* Quirk: for non-instanced graphics, the blob sets workgroups_z_shift - * = 32. This doesn't appear to matter to the hardware, but it's good - * to be bit-identical. */ + pan_pack(out, INVOCATION, cfg) { + cfg.invocations = packed; + cfg.size_y_shift = shifts[1]; + cfg.size_z_shift = shifts[2]; + cfg.workgroups_x_shift = shifts[3]; + cfg.workgroups_y_shift = shifts[4]; + cfg.workgroups_z_shift = shifts[5]; - if (quirk_graphics && (num_z <= 1)) - shifts[5] = 32; + /* Quirk: for non-instanced graphics, the blob sets + * workgroups_z_shift = 32. This doesn't appear to matter to + * the hardware, but it's good to be bit-identical. */ - /* Quirk: for graphics, workgroups_x_shift_2 must be at least 2, - * whereas for OpenCL it is simply equal to workgroups_x_shift. For GL - * compute, it is always 2 if no barriers are in use, but is equal to - * workgroups_x_shift is barriers are in use. */ + if (quirk_graphics && (num_z <= 1)) + cfg.workgroups_z_shift = 32; - unsigned shift_2 = shifts[3]; + /* Quirk: for graphics, >= 2. For compute, 2 without barriers + * but equal to workgroups_x_shift with barriers */ - if (quirk_graphics) - shift_2 = MAX2(shift_2, 2); - - /* Pack them in */ - uint32_t packed_shifts = - (shifts[1] << 0) | - (shifts[2] << 5) | - (shifts[3] << 10) | - (shifts[4] << 16) | - (shifts[5] << 22) | - (shift_2 << 28); - - /* Upload the packed bitfields */ - out->invocation_count = packed; - out->invocation_shifts = packed_shifts; + cfg.unknown_shift = quirk_graphics ? 2 : cfg.workgroups_x_shift; + } } - -/* Packs vertex/tiler descriptors simultaneously */ -void -panfrost_pack_work_groups_fused( - struct mali_vertex_tiler_prefix *vertex, - struct mali_vertex_tiler_prefix *tiler, - unsigned num_x, - unsigned num_y, - unsigned num_z, - unsigned size_x, - unsigned size_y, - unsigned size_z) -{ - panfrost_pack_work_groups_compute(vertex, num_x, num_y, num_z, size_x, size_y, size_z, true); - - /* Copy results over */ - tiler->invocation_count = vertex->invocation_count; - tiler->invocation_shifts = vertex->invocation_shifts; -} -