From 7cc4c8c5e568be9d1a5660301edcb5913131201f Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 11 May 2021 11:16:46 +0200 Subject: [PATCH] pan/bi: Add support for gl_{BaseVertex,BaseInstance} Extend the VERTEX_INSTANCE_OFFSETS sysval to pass BaseVertex/BaseInstance information to the shader. Signed-off-by: Boris Brezillon Acked-by: Alyssa Rosenzweig Part-of: --- src/gallium/drivers/panfrost/pan_cmdstream.c | 18 ++++++ src/gallium/drivers/panfrost/pan_context.c | 7 ++ src/gallium/drivers/panfrost/pan_context.h | 4 ++ src/gallium/drivers/panfrost/pan_screen.c | 3 + src/panfrost/bifrost/bifrost_compile.c | 8 +++ src/panfrost/lib/pan_indirect_draw.c | 67 ++++++++++++++++---- src/panfrost/lib/pan_indirect_draw.h | 2 + src/panfrost/util/pan_sysval.c | 2 + 8 files changed, 99 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index 1d5a4a6e4ca..930d37e7bfe 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -1083,8 +1083,14 @@ panfrost_upload_sysvals(struct panfrost_batch *batch, case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS: batch->ctx->first_vertex_sysval_ptr = ptr->gpu + (i * sizeof(*uniforms)); + batch->ctx->base_vertex_sysval_ptr = + batch->ctx->first_vertex_sysval_ptr + 4; + batch->ctx->base_instance_sysval_ptr = + batch->ctx->first_vertex_sysval_ptr + 8; uniforms[i].u[0] = batch->ctx->offset_start; + uniforms[i].u[1] = batch->ctx->base_vertex; + uniforms[i].u[2] = batch->ctx->base_instance; break; default: assert(0); @@ -1200,6 +1206,12 @@ panfrost_emit_const_buf(struct panfrost_batch *batch, case 0: batch->ctx->first_vertex_sysval_ptr = ptr; break; + case 1: + batch->ctx->base_vertex_sysval_ptr = ptr; + break; + case 2: + batch->ctx->base_instance_sysval_ptr = ptr; + break; default: unreachable("Invalid vertex/instance offset component\n"); } @@ -1730,6 +1742,12 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch, /* BOs aligned to 4k so guaranteed aligned to 64 */ src_offset += (buf->buffer_offset & 63); + /* Base instance offset */ + if (ctx->base_instance && so->pipe[i].instance_divisor) { + src_offset += (ctx->base_instance * buf->stride) / + so->pipe[i].instance_divisor; + } + /* Also, somewhat obscurely per-instance data needs to be * offset in response to a delayed start in an indexed draw */ diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index 98cf726aa09..fe06b5adab4 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -450,6 +450,8 @@ panfrost_direct_draw(struct panfrost_context *ctx, ctx->indirect_draw = false; ctx->vertex_count = draw->count + (info->index_size ? abs(draw->index_bias) : 0); ctx->instance_count = info->instance_count; + ctx->base_vertex = info->index_size ? draw->index_bias : 0; + ctx->base_instance = info->start_instance; ctx->active_prim = info->mode; struct panfrost_ptr tiler = @@ -613,6 +615,9 @@ panfrost_indirect_draw(struct panfrost_context *ctx, * vertex shader uses gl_VertexID or gl_BaseVertex. */ ctx->first_vertex_sysval_ptr = 0; + ctx->base_vertex_sysval_ptr = 0; + ctx->base_instance_sysval_ptr = 0; + bool point_coord_replace = (info->mode == PIPE_PRIM_POINTS); panfrost_emit_varying_descriptor(batch, 0, @@ -660,6 +665,8 @@ panfrost_indirect_draw(struct panfrost_context *ctx, .draw_buf = draw_buf->image.data.bo->ptr.gpu + indirect->offset, .index_buf = index_buf ? index_buf->ptr.gpu : 0, .first_vertex_sysval = ctx->first_vertex_sysval_ptr, + .base_vertex_sysval = ctx->base_vertex_sysval_ptr, + .base_instance_sysval = ctx->base_instance_sysval_ptr, .vertex_job = vertex.gpu, .tiler_job = tiler.gpu, .attrib_bufs = attrib_bufs, diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h index de2ac6cc7b6..b825b13c677 100644 --- a/src/gallium/drivers/panfrost/pan_context.h +++ b/src/gallium/drivers/panfrost/pan_context.h @@ -137,7 +137,11 @@ struct panfrost_context { unsigned vertex_count; unsigned instance_count; unsigned offset_start; + unsigned base_vertex; + unsigned base_instance; mali_ptr first_vertex_sysval_ptr; + mali_ptr base_vertex_sysval_ptr; + mali_ptr base_instance_sysval_ptr; enum pipe_prim_type active_prim; /* If instancing is enabled, vertex count padded for instance; if diff --git a/src/gallium/drivers/panfrost/pan_screen.c b/src/gallium/drivers/panfrost/pan_screen.c index 12ae7fc5797..d8dffc47868 100644 --- a/src/gallium/drivers/panfrost/pan_screen.c +++ b/src/gallium/drivers/panfrost/pan_screen.c @@ -310,6 +310,9 @@ panfrost_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_DRAW_INDIRECT: return has_heap && is_deqp; + case PIPE_CAP_START_INSTANCE: + return pan_is_bifrost(dev) && is_deqp; + default: return u_pipe_screen_get_param_defaults(screen, param); } diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index 0045a8fdf6d..98fd1d34f3e 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -1158,6 +1158,14 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr) bi_load_sysval_nir(b, instr, 1, 0); break; + case nir_intrinsic_load_base_vertex: + bi_load_sysval_nir(b, instr, 1, 4); + break; + + case nir_intrinsic_load_base_instance: + bi_load_sysval_nir(b, instr, 1, 8); + break; + case nir_intrinsic_get_ssbo_size: bi_load_sysval_nir(b, instr, 1, 8); break; diff --git a/src/panfrost/lib/pan_indirect_draw.c b/src/panfrost/lib/pan_indirect_draw.c index 78a592098c4..16721c4f3fc 100644 --- a/src/panfrost/lib/pan_indirect_draw.c +++ b/src/panfrost/lib/pan_indirect_draw.c @@ -55,6 +55,7 @@ struct draw_data { nir_ssa_def *index_buf; nir_ssa_def *restart_index; nir_ssa_def *vertex_count; + nir_ssa_def *start_instance; nir_ssa_def *instance_count; nir_ssa_def *vertex_start; nir_ssa_def *index_bias; @@ -73,6 +74,8 @@ struct jobs_data { nir_ssa_def *tiler_job; nir_ssa_def *base_vertex_offset; nir_ssa_def *first_vertex_sysval; + nir_ssa_def *base_vertex_sysval; + nir_ssa_def *base_instance_sysval; nir_ssa_def *offset_start; nir_ssa_def *invocation; }; @@ -160,6 +163,9 @@ struct indirect_draw_inputs { /* {base,first}_{vertex,instance} sysvals */ mali_ptr first_vertex_sysval; + mali_ptr base_vertex_sysval; + mali_ptr base_instance_sysval; + /* Pointers to various cmdstream structs that need to be patched */ mali_ptr vertex_job; mali_ptr tiler_job; @@ -318,6 +324,8 @@ extract_inputs(struct indirect_draw_shader_builder *builder) return; builder->jobs.first_vertex_sysval = get_input_field(b, first_vertex_sysval); + builder->jobs.base_vertex_sysval = get_input_field(b, base_vertex_sysval); + builder->jobs.base_instance_sysval = get_input_field(b, base_instance_sysval); builder->jobs.vertex_job = get_input_field(b, vertex_job); builder->jobs.tiler_job = get_input_field(b, tiler_job); builder->attribs.attrib_bufs = get_input_field(b, attrib_bufs); @@ -506,7 +514,8 @@ update_vertex_attrib_buf(struct indirect_draw_shader_builder *builder, static void adjust_attrib_offset(struct indirect_draw_shader_builder *builder, - nir_ssa_def *attrib_ptr, nir_ssa_def *attrib_buf_ptr) + nir_ssa_def *attrib_ptr, nir_ssa_def *attrib_buf_ptr, + nir_ssa_def *instance_div) { nir_builder *b = &builder->b; nir_ssa_def *zero = nir_imm_int(b, 0); @@ -515,18 +524,34 @@ adjust_attrib_offset(struct indirect_draw_shader_builder *builder, nir_iand(b, nir_ine(b, builder->jobs.offset_start, zero), nir_ige(b, builder->draw.instance_count, two)); - IF (sub_cur_offset) { + nir_ssa_def *add_base_inst_offset = + nir_iand(b, nir_ine(b, builder->draw.start_instance, zero), + nir_ine(b, instance_div, zero)); + + IF (nir_ior(b, sub_cur_offset, add_base_inst_offset)) { + nir_ssa_def *offset = + load_global(b, get_address_imm(b, attrib_ptr, WORD(1)), 1, 32); + nir_ssa_def *stride = + load_global(b, get_address_imm(b, attrib_buf_ptr, WORD(2)), 1, 32); + /* Per-instance data needs to be offset in response to a * delayed start in an indexed draw. */ - nir_ssa_def *stride = - load_global(b, get_address_imm(b, attrib_buf_ptr, WORD(2)), 1, 32); - nir_ssa_def *offset = - load_global(b, get_address_imm(b, attrib_ptr, WORD(1)), 1, 32); - offset = nir_isub(b, offset, - nir_imul(b, stride, - builder->jobs.offset_start)); + IF (add_base_inst_offset) { + offset = nir_iadd(b, offset, + nir_idiv(b, + nir_imul(b, stride, + builder->draw.start_instance), + instance_div)); + } ENDIF + + IF (sub_cur_offset) { + offset = nir_isub(b, offset, + nir_imul(b, stride, + builder->jobs.offset_start)); + } ENDIF + store_global(b, get_address_imm(b, attrib_ptr, WORD(1)), offset, 1); } ENDIF @@ -600,10 +625,10 @@ update_vertex_attribs(struct indirect_draw_shader_builder *builder) } ENDIF } - nir_ssa_def *div = + nir_ssa_def *instance_div = load_global(b, get_address_imm(b, attrib_buf_ptr, WORD(7)), 1, 32); - div = nir_imul(b, div, builder->instance_size.padded); + nir_ssa_def *div = nir_imul(b, instance_div, builder->instance_size.padded); nir_ssa_def *multi_instance = nir_ige(b, builder->draw.instance_count, nir_imm_int(b, 2)); @@ -636,7 +661,7 @@ update_vertex_attribs(struct indirect_draw_shader_builder *builder) nir_imm_int(b, 31), NULL); } ENDIF - adjust_attrib_offset(builder, attrib_ptr, attrib_buf_ptr); + adjust_attrib_offset(builder, attrib_ptr, attrib_buf_ptr, instance_div); } ELSE { IF (multi_instance) { update_vertex_attrib_buf(builder, attrib_buf_ptr, @@ -888,12 +913,14 @@ patch(struct indirect_draw_shader_builder *builder) if (index_size) { builder->draw.vertex_count = get_indexed_draw_field(b, draw_ptr, count); + builder->draw.start_instance = get_indexed_draw_field(b, draw_ptr, start_instance); builder->draw.instance_count = get_indexed_draw_field(b, draw_ptr, instance_count); builder->draw.vertex_start = get_indexed_draw_field(b, draw_ptr, start); builder->draw.index_bias = get_indexed_draw_field(b, draw_ptr, index_bias); } else { builder->draw.vertex_count = get_draw_field(b, draw_ptr, count); + builder->draw.start_instance = get_draw_field(b, draw_ptr, start_instance); builder->draw.instance_count = get_draw_field(b, draw_ptr, instance_count); builder->draw.vertex_start = get_draw_field(b, draw_ptr, start); } @@ -914,6 +941,20 @@ patch(struct indirect_draw_shader_builder *builder) store_global(b, builder->jobs.first_vertex_sysval, builder->jobs.offset_start, 1); } ENDIF + + IF (nir_ine(b, builder->jobs.base_vertex_sysval, nir_imm_int64(b, 0))) { + store_global(b, builder->jobs.base_vertex_sysval, + index_size ? + builder->draw.index_bias : + nir_imm_int(b, 0), + 1); + } ENDIF + + IF (nir_ine(b, builder->jobs.base_instance_sysval, nir_imm_int64(b, 0))) { + store_global(b, builder->jobs.base_instance_sysval, + builder->draw.start_instance, 1); + } ENDIF + } /* Search the min/max index in the range covered by the indirect draw call */ @@ -1255,6 +1296,8 @@ panfrost_emit_indirect_draw(struct pan_pool *pool, .draw_buf = draw_info->draw_buf, .index_buf = draw_info->index_buf, .first_vertex_sysval = draw_info->first_vertex_sysval, + .base_vertex_sysval = draw_info->base_vertex_sysval, + .base_instance_sysval = draw_info->base_instance_sysval, .vertex_job = draw_info->vertex_job, .tiler_job = draw_info->tiler_job, .attrib_bufs = draw_info->attrib_bufs, diff --git a/src/panfrost/lib/pan_indirect_draw.h b/src/panfrost/lib/pan_indirect_draw.h index f20d10a03b5..773bb8f7878 100644 --- a/src/panfrost/lib/pan_indirect_draw.h +++ b/src/panfrost/lib/pan_indirect_draw.h @@ -32,6 +32,8 @@ struct pan_indirect_draw_info { mali_ptr draw_buf; mali_ptr index_buf; mali_ptr first_vertex_sysval; + mali_ptr base_vertex_sysval; + mali_ptr base_instance_sysval; mali_ptr vertex_job; mali_ptr tiler_job; mali_ptr attrib_bufs; diff --git a/src/panfrost/util/pan_sysval.c b/src/panfrost/util/pan_sysval.c index 7d17cf0cb9d..128183a1b0a 100644 --- a/src/panfrost/util/pan_sysval.c +++ b/src/panfrost/util/pan_sysval.c @@ -79,6 +79,8 @@ panfrost_nir_sysval_for_intrinsic(nir_intrinsic_instr *instr) case nir_intrinsic_load_sample_positions_pan: return PAN_SYSVAL_SAMPLE_POSITIONS; case nir_intrinsic_load_first_vertex: + case nir_intrinsic_load_base_vertex: + case nir_intrinsic_load_base_instance: return PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS; case nir_intrinsic_load_ssbo_address: case nir_intrinsic_get_ssbo_size: