From a3d0359aff7a9be90149c416844f330b4f9a15ed Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Tue, 27 Oct 2015 06:58:15 +1100 Subject: [PATCH] glsl: keep track of intra-stage indices for atomics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is more optimal as it means we no longer have to upload the same set of ABO surfaces to all stages in the program. This also fixes a bug where since commit c0cd5b var->data.binding was being used as a replacement for atomic buffer index, but they don't have to be the same value they just happened to end up the same when binding is 0. Reviewed-by: Francisco Jerez Reviewed-by: Samuel Iglesias Gonsálvez Cc: Ilia Mirkin Cc: Alejandro Piñeiro Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90175 --- src/glsl/link_atomics.cpp | 43 +++++++++++++++++-- src/glsl/nir/glsl_to_nir.cpp | 2 - src/glsl/nir/nir.h | 4 +- src/glsl/nir/nir_lower_atomics.c | 25 ++++++++--- src/mesa/drivers/dri/i965/brw_context.h | 2 +- .../drivers/dri/i965/brw_gs_surface_state.c | 4 +- src/mesa/drivers/dri/i965/brw_nir.c | 6 ++- src/mesa/drivers/dri/i965/brw_shader.cpp | 4 +- .../drivers/dri/i965/brw_vs_surface_state.c | 4 +- .../drivers/dri/i965/brw_wm_surface_state.c | 35 +++++++-------- src/mesa/main/mtypes.h | 5 ++- 11 files changed, 95 insertions(+), 39 deletions(-) diff --git a/src/glsl/link_atomics.cpp b/src/glsl/link_atomics.cpp index 70ef0e1c891..cdcc06d53e2 100644 --- a/src/glsl/link_atomics.cpp +++ b/src/glsl/link_atomics.cpp @@ -198,6 +198,7 @@ link_assign_atomic_counter_resources(struct gl_context *ctx, struct gl_shader_program *prog) { unsigned num_buffers; + unsigned num_atomic_buffers[MESA_SHADER_STAGES] = {}; active_atomic_buffer *abs = find_active_atomic_counters(ctx, prog, &num_buffers); @@ -242,13 +243,49 @@ link_assign_atomic_counter_resources(struct gl_context *ctx, } /* Assign stage-specific fields. */ - for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j) - mab.StageReferences[j] = - (ab.stage_references[j] ? GL_TRUE : GL_FALSE); + for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j) { + if (ab.stage_references[j]) { + mab.StageReferences[j] = GL_TRUE; + num_atomic_buffers[j]++; + } else { + mab.StageReferences[j] = GL_FALSE; + } + } i++; } + /* Store a list pointers to atomic buffers per stage and store the index + * to the intra-stage buffer list in uniform storage. + */ + for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j) { + if (prog->_LinkedShaders[j] && num_atomic_buffers[j] > 0) { + prog->_LinkedShaders[j]->NumAtomicBuffers = num_atomic_buffers[j]; + prog->_LinkedShaders[j]->AtomicBuffers = + rzalloc_array(prog, gl_active_atomic_buffer *, + num_atomic_buffers[j]); + + unsigned intra_stage_idx = 0; + for (unsigned i = 0; i < num_buffers; i++) { + struct gl_active_atomic_buffer *atomic_buffer = + &prog->AtomicBuffers[i]; + if (atomic_buffer->StageReferences[j]) { + prog->_LinkedShaders[j]->AtomicBuffers[intra_stage_idx] = + atomic_buffer; + + for (unsigned u = 0; u < atomic_buffer->NumUniforms; u++) { + prog->UniformStorage[atomic_buffer->Uniforms[u]].opaque[j].index = + intra_stage_idx; + prog->UniformStorage[atomic_buffer->Uniforms[u]].opaque[j].active = + true; + } + + intra_stage_idx++; + } + } + } + } + delete [] abs; assert(i == num_buffers); } diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp index 9b50a93e7f6..01f16d70eb1 100644 --- a/src/glsl/nir/glsl_to_nir.cpp +++ b/src/glsl/nir/glsl_to_nir.cpp @@ -392,8 +392,6 @@ nir_visitor::visit(ir_variable *ir) var->data.index = ir->data.index; var->data.binding = ir->data.binding; - /* XXX Get rid of buffer_index */ - var->data.atomic.buffer_index = ir->data.binding; var->data.atomic.offset = ir->data.atomic.offset; var->data.image.read_only = ir->data.image_read_only; var->data.image.write_only = ir->data.image_write_only; diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index e3777f926e2..04a21a7ead6 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -308,7 +308,6 @@ typedef struct { * Location an atomic counter is stored at. */ struct { - unsigned buffer_index; unsigned offset; } atomic; @@ -1978,7 +1977,8 @@ void nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables); void nir_lower_two_sided_color(nir_shader *shader); -void nir_lower_atomics(nir_shader *shader); +void nir_lower_atomics(nir_shader *shader, + const struct gl_shader_program *shader_program); void nir_lower_to_source_mods(nir_shader *shader); bool nir_lower_gs_intrinsics(nir_shader *shader); diff --git a/src/glsl/nir/nir_lower_atomics.c b/src/glsl/nir/nir_lower_atomics.c index 46e137652a1..40ca3de96cf 100644 --- a/src/glsl/nir/nir_lower_atomics.c +++ b/src/glsl/nir/nir_lower_atomics.c @@ -25,17 +25,24 @@ * */ +#include "ir_uniform.h" #include "nir.h" #include "main/config.h" #include +typedef struct { + const struct gl_shader_program *shader_program; + nir_shader *shader; +} lower_atomic_state; + /* * replace atomic counter intrinsics that use a variable with intrinsics * that directly store the buffer index and byte offset */ static void -lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl) +lower_instr(nir_intrinsic_instr *instr, + lower_atomic_state *state) { nir_intrinsic_op op; switch (instr->intrinsic) { @@ -60,10 +67,11 @@ lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl) return; /* atomics passed as function arguments can't be lowered */ void *mem_ctx = ralloc_parent(instr); + unsigned uniform_loc = instr->variables[0]->var->data.location; nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op); new_instr->const_index[0] = - (int) instr->variables[0]->var->data.atomic.buffer_index; + state->shader_program->UniformStorage[uniform_loc].opaque[state->shader->stage].index; nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, 1); offset_const->value.u[0] = instr->variables[0]->var->data.atomic.offset; @@ -132,18 +140,25 @@ lower_block(nir_block *block, void *state) { nir_foreach_instr_safe(block, instr) { if (instr->type == nir_instr_type_intrinsic) - lower_instr(nir_instr_as_intrinsic(instr), state); + lower_instr(nir_instr_as_intrinsic(instr), + (lower_atomic_state *) state); } return true; } void -nir_lower_atomics(nir_shader *shader) +nir_lower_atomics(nir_shader *shader, + const struct gl_shader_program *shader_program) { + lower_atomic_state state = { + .shader = shader, + .shader_program = shader_program, + }; + nir_foreach_overload(shader, overload) { if (overload->impl) { - nir_foreach_block(overload->impl, lower_block, overload->impl); + nir_foreach_block(overload->impl, lower_block, (void *) &state); nir_metadata_preserve(overload->impl, nir_metadata_block_index | nir_metadata_dominance); } diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 0fdc83ef7e1..18c361ea8cd 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1463,7 +1463,7 @@ void brw_upload_ubo_surfaces(struct brw_context *brw, struct brw_stage_prog_data *prog_data, bool dword_pitch); void brw_upload_abo_surfaces(struct brw_context *brw, - struct gl_shader_program *prog, + struct gl_shader *shader, struct brw_stage_state *stage_state, struct brw_stage_prog_data *prog_data); void brw_upload_image_surfaces(struct brw_context *brw, diff --git a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c index 00125c0f405..76ed237d88a 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c @@ -105,8 +105,8 @@ brw_upload_gs_abo_surfaces(struct brw_context *brw) if (prog) { /* BRW_NEW_GS_PROG_DATA */ - brw_upload_abo_surfaces(brw, prog, &brw->gs.base, - &brw->gs.prog_data->base.base); + brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_GEOMETRY], + &brw->gs.base, &brw->gs.prog_data->base.base); } } diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index 1b4dace84fb..11f111382f4 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -249,8 +249,10 @@ brw_create_nir(struct brw_context *brw, nir_lower_system_values(nir); nir_validate_shader(nir); - nir_lower_atomics(nir); - nir_validate_shader(nir); + if (shader_prog) { + nir_lower_atomics(nir, shader_prog); + nir_validate_shader(nir); + } nir_optimize(nir, is_scalar); diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 204935641f3..4ea297ade4c 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -1191,9 +1191,9 @@ brw_assign_common_binding_table_offsets(gl_shader_stage stage, stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0; } - if (shader_prog && shader_prog->NumAtomicBuffers) { + if (shader && shader->NumAtomicBuffers) { stage_prog_data->binding_table.abo_start = next_binding_table_offset; - next_binding_table_offset += shader_prog->NumAtomicBuffers; + next_binding_table_offset += shader->NumAtomicBuffers; } else { stage_prog_data->binding_table.abo_start = 0xd0d0d0d0; } diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c index f65258a52a5..d7473845c72 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -177,8 +177,8 @@ brw_upload_vs_abo_surfaces(struct brw_context *brw) if (prog) { /* BRW_NEW_VS_PROG_DATA */ - brw_upload_abo_surfaces(brw, prog, &brw->vs.base, - &brw->vs.prog_data->base.base); + brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_VERTEX], + &brw->vs.base, &brw->vs.prog_data->base.base); } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 6ebe6481c32..f88f8d59196 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -1029,7 +1029,7 @@ const struct brw_tracked_state brw_cs_ubo_surfaces = { void brw_upload_abo_surfaces(struct brw_context *brw, - struct gl_shader_program *prog, + struct gl_shader *shader, struct brw_stage_state *stage_state, struct brw_stage_prog_data *prog_data) { @@ -1037,21 +1037,22 @@ brw_upload_abo_surfaces(struct brw_context *brw, uint32_t *surf_offsets = &stage_state->surf_offset[prog_data->binding_table.abo_start]; - for (unsigned i = 0; i < prog->NumAtomicBuffers; i++) { - struct gl_atomic_buffer_binding *binding = - &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding]; - struct intel_buffer_object *intel_bo = - intel_buffer_object(binding->BufferObject); - drm_intel_bo *bo = intel_bufferobj_buffer( - brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset); + if (shader && shader->NumAtomicBuffers) { + for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) { + struct gl_atomic_buffer_binding *binding = + &ctx->AtomicBufferBindings[shader->AtomicBuffers[i]->Binding]; + struct intel_buffer_object *intel_bo = + intel_buffer_object(binding->BufferObject); + drm_intel_bo *bo = intel_bufferobj_buffer( + brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset); - brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo, - binding->Offset, BRW_SURFACEFORMAT_RAW, - bo->size - binding->Offset, 1, true); - } + brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo, + binding->Offset, BRW_SURFACEFORMAT_RAW, + bo->size - binding->Offset, 1, true); + } - if (prog->NumAtomicBuffers) brw->ctx.NewDriverState |= BRW_NEW_SURFACES; + } } static void @@ -1063,8 +1064,8 @@ brw_upload_wm_abo_surfaces(struct brw_context *brw) if (prog) { /* BRW_NEW_FS_PROG_DATA */ - brw_upload_abo_surfaces(brw, prog, &brw->wm.base, - &brw->wm.prog_data->base); + brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT], + &brw->wm.base, &brw->wm.prog_data->base); } } @@ -1088,8 +1089,8 @@ brw_upload_cs_abo_surfaces(struct brw_context *brw) if (prog) { /* BRW_NEW_CS_PROG_DATA */ - brw_upload_abo_surfaces(brw, prog, &brw->cs.base, - &brw->cs.prog_data->base); + brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE], + &brw->cs.base, &brw->cs.prog_data->base); } } diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 20dd70ef734..34120cf5777 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2389,6 +2389,9 @@ struct gl_shader */ GLuint NumImages; + struct gl_active_atomic_buffer **AtomicBuffers; + unsigned NumAtomicBuffers; + /** * Whether early fragment tests are enabled as defined by * ARB_shader_image_load_store. @@ -4496,7 +4499,7 @@ static inline bool _mesa_active_fragment_shader_has_atomic_ops(const struct gl_context *ctx) { return ctx->Shader._CurrentFragmentProgram != NULL && - ctx->Shader._CurrentFragmentProgram->NumAtomicBuffers > 0; + ctx->Shader._CurrentFragmentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT]->NumAtomicBuffers > 0; } #ifdef __cplusplus