panfrost: Merge attribute packing routines

In preparation for streamlining the packing, we need related code in one
place.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Reviewed-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6326>
This commit is contained in:
Alyssa Rosenzweig 2020-08-13 14:32:23 -04:00 committed by Marge Bot
parent 2c8a722b85
commit 76de3e691c
4 changed files with 49 additions and 82 deletions

View File

@ -1337,23 +1337,6 @@ panfrost_emit_sampler_descriptors(struct panfrost_batch *batch,
postfix->sampler_descriptor = T.gpu;
}
void
panfrost_emit_vertex_attr_meta(struct panfrost_batch *batch,
struct mali_vertex_tiler_postfix *vertex_postfix)
{
struct panfrost_context *ctx = batch->ctx;
if (!ctx->vertex)
return;
struct panfrost_vertex_state *so = ctx->vertex;
panfrost_vertex_state_upd_attr_offs(ctx, vertex_postfix);
vertex_postfix->attribute_meta = panfrost_pool_upload(&batch->pool, so->hw,
sizeof(*so->hw) *
PAN_MAX_ATTRIBUTE);
}
void
panfrost_emit_vertex_data(struct panfrost_batch *batch,
struct mali_vertex_tiler_postfix *vertex_postfix)
@ -1456,10 +1439,58 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch,
panfrost_instance_id(ctx->padded_count, &attrs[k]);
so->hw[PAN_INSTANCE_ID].index = k++;
/* Upload whatever we emitted and go */
/* Fixup offsets for the second pass. Recall that the hardware
* calculates attribute addresses as:
*
* addr = base + (stride * vtx) + src_offset;
*
* However, on Mali, base must be aligned to 64-bytes, so we
* instead let:
*
* base' = base & ~63 = base - (base & 63)
*
* To compensate when using base' (see emit_vertex_data), we have
* to adjust src_offset by the masked off piece:
*
* addr' = base' + (stride * vtx) + (src_offset + (base & 63))
* = base - (base & 63) + (stride * vtx) + src_offset + (base & 63)
* = base + (stride * vtx) + src_offset
* = addr;
*
* QED.
*/
unsigned start = vertex_postfix->offset_start;
for (unsigned i = 0; i < so->num_elements; ++i) {
unsigned vbi = so->pipe[i].vertex_buffer_index;
struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
/* Adjust by the masked off bits of the offset. Make sure we
* read src_offset from so->hw (which is not GPU visible)
* rather than target (which is) due to caching effects */
unsigned src_offset = so->pipe[i].src_offset;
/* BOs aligned to 4k so guaranteed aligned to 64 */
src_offset += (buf->buffer_offset & 63);
/* Also, somewhat obscurely per-instance data needs to be
* offset in response to a delayed start in an indexed draw */
if (so->pipe[i].instance_divisor && ctx->instance_count > 1 && start)
src_offset -= buf->stride * start;
so->hw[i].src_offset = src_offset;
}
vertex_postfix->attributes = panfrost_pool_upload(&batch->pool, attrs,
k * sizeof(*attrs));
vertex_postfix->attribute_meta = panfrost_pool_upload(&batch->pool, so->hw,
sizeof(*so->hw) *
PAN_MAX_ATTRIBUTE);
}
static mali_ptr

View File

@ -80,10 +80,6 @@ panfrost_emit_sampler_descriptors(struct panfrost_batch *batch,
enum pipe_shader_type stage,
struct mali_vertex_tiler_postfix *postfix);
void
panfrost_emit_vertex_attr_meta(struct panfrost_batch *batch,
struct mali_vertex_tiler_postfix *vertex_postfix);
void
panfrost_emit_vertex_data(struct panfrost_batch *batch,
struct mali_vertex_tiler_postfix *vertex_postfix);

View File

@ -163,61 +163,6 @@ panfrost_writes_point_size(struct panfrost_context *ctx)
return vs->writes_point_size && ctx->active_prim == PIPE_PRIM_POINTS;
}
void
panfrost_vertex_state_upd_attr_offs(struct panfrost_context *ctx,
struct mali_vertex_tiler_postfix *vertex_postfix)
{
if (!ctx->vertex)
return;
struct panfrost_vertex_state *so = ctx->vertex;
/* Fixup offsets for the second pass. Recall that the hardware
* calculates attribute addresses as:
*
* addr = base + (stride * vtx) + src_offset;
*
* However, on Mali, base must be aligned to 64-bytes, so we
* instead let:
*
* base' = base & ~63 = base - (base & 63)
*
* To compensate when using base' (see emit_vertex_data), we have
* to adjust src_offset by the masked off piece:
*
* addr' = base' + (stride * vtx) + (src_offset + (base & 63))
* = base - (base & 63) + (stride * vtx) + src_offset + (base & 63)
* = base + (stride * vtx) + src_offset
* = addr;
*
* QED.
*/
unsigned start = vertex_postfix->offset_start;
for (unsigned i = 0; i < so->num_elements; ++i) {
unsigned vbi = so->pipe[i].vertex_buffer_index;
struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
/* Adjust by the masked off bits of the offset. Make sure we
* read src_offset from so->hw (which is not GPU visible)
* rather than target (which is) due to caching effects */
unsigned src_offset = so->pipe[i].src_offset;
/* BOs aligned to 4k so guaranteed aligned to 64 */
src_offset += (buf->buffer_offset & 63);
/* Also, somewhat obscurely per-instance data needs to be
* offset in response to a delayed start in an indexed draw */
if (so->pipe[i].instance_divisor && ctx->instance_count > 1 && start)
src_offset -= buf->stride * start;
so->hw[i].src_offset = src_offset;
}
}
/* Compute number of UBOs active (more specifically, compute the highest UBO
* number addressable -- if there are gaps, include them in the count anyway).
* We always include UBO #0 in the count, since we *need* uniforms enabled for
@ -422,7 +367,6 @@ panfrost_draw_vbo(
&primitive_size);
panfrost_emit_shader_meta(batch, PIPE_SHADER_VERTEX, &vertex_postfix);
panfrost_emit_shader_meta(batch, PIPE_SHADER_FRAGMENT, &tiler_postfix);
panfrost_emit_vertex_attr_meta(batch, &vertex_postfix);
panfrost_emit_sampler_descriptors(batch, PIPE_SHADER_VERTEX, &vertex_postfix);
panfrost_emit_sampler_descriptors(batch, PIPE_SHADER_FRAGMENT, &tiler_postfix);
panfrost_emit_texture_descriptors(batch, PIPE_SHADER_VERTEX, &vertex_postfix);

View File

@ -309,10 +309,6 @@ panfrost_invalidate_frame(struct panfrost_context *ctx);
bool
panfrost_writes_point_size(struct panfrost_context *ctx);
void
panfrost_vertex_state_upd_attr_offs(struct panfrost_context *ctx,
struct mali_vertex_tiler_postfix *vertex_postfix);
struct panfrost_transfer
panfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler);