glsl: keep track of intra-stage indices for atomics
This is more optimal as it means we no longer have to upload the same set of ABO surfaces to all stages in the program. This also fixes a bug where since commit c0cd5b var->data.binding was being used as a replacement for atomic buffer index, but they don't have to be the same value they just happened to end up the same when binding is 0. Reviewed-by: Francisco Jerez <currojerez@riseup.net> Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com> Cc: Ilia Mirkin <imirkin@alum.mit.edu> Cc: Alejandro Piñeiro <apinheiro@igalia.com> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90175
This commit is contained in:
parent
711489648b
commit
a3d0359aff
|
@ -198,6 +198,7 @@ link_assign_atomic_counter_resources(struct gl_context *ctx,
|
||||||
struct gl_shader_program *prog)
|
struct gl_shader_program *prog)
|
||||||
{
|
{
|
||||||
unsigned num_buffers;
|
unsigned num_buffers;
|
||||||
|
unsigned num_atomic_buffers[MESA_SHADER_STAGES] = {};
|
||||||
active_atomic_buffer *abs =
|
active_atomic_buffer *abs =
|
||||||
find_active_atomic_counters(ctx, prog, &num_buffers);
|
find_active_atomic_counters(ctx, prog, &num_buffers);
|
||||||
|
|
||||||
|
@ -242,13 +243,49 @@ link_assign_atomic_counter_resources(struct gl_context *ctx,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Assign stage-specific fields. */
|
/* Assign stage-specific fields. */
|
||||||
for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j)
|
for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j) {
|
||||||
mab.StageReferences[j] =
|
if (ab.stage_references[j]) {
|
||||||
(ab.stage_references[j] ? GL_TRUE : GL_FALSE);
|
mab.StageReferences[j] = GL_TRUE;
|
||||||
|
num_atomic_buffers[j]++;
|
||||||
|
} else {
|
||||||
|
mab.StageReferences[j] = GL_FALSE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Store a list pointers to atomic buffers per stage and store the index
|
||||||
|
* to the intra-stage buffer list in uniform storage.
|
||||||
|
*/
|
||||||
|
for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j) {
|
||||||
|
if (prog->_LinkedShaders[j] && num_atomic_buffers[j] > 0) {
|
||||||
|
prog->_LinkedShaders[j]->NumAtomicBuffers = num_atomic_buffers[j];
|
||||||
|
prog->_LinkedShaders[j]->AtomicBuffers =
|
||||||
|
rzalloc_array(prog, gl_active_atomic_buffer *,
|
||||||
|
num_atomic_buffers[j]);
|
||||||
|
|
||||||
|
unsigned intra_stage_idx = 0;
|
||||||
|
for (unsigned i = 0; i < num_buffers; i++) {
|
||||||
|
struct gl_active_atomic_buffer *atomic_buffer =
|
||||||
|
&prog->AtomicBuffers[i];
|
||||||
|
if (atomic_buffer->StageReferences[j]) {
|
||||||
|
prog->_LinkedShaders[j]->AtomicBuffers[intra_stage_idx] =
|
||||||
|
atomic_buffer;
|
||||||
|
|
||||||
|
for (unsigned u = 0; u < atomic_buffer->NumUniforms; u++) {
|
||||||
|
prog->UniformStorage[atomic_buffer->Uniforms[u]].opaque[j].index =
|
||||||
|
intra_stage_idx;
|
||||||
|
prog->UniformStorage[atomic_buffer->Uniforms[u]].opaque[j].active =
|
||||||
|
true;
|
||||||
|
}
|
||||||
|
|
||||||
|
intra_stage_idx++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
delete [] abs;
|
delete [] abs;
|
||||||
assert(i == num_buffers);
|
assert(i == num_buffers);
|
||||||
}
|
}
|
||||||
|
|
|
@ -392,8 +392,6 @@ nir_visitor::visit(ir_variable *ir)
|
||||||
|
|
||||||
var->data.index = ir->data.index;
|
var->data.index = ir->data.index;
|
||||||
var->data.binding = ir->data.binding;
|
var->data.binding = ir->data.binding;
|
||||||
/* XXX Get rid of buffer_index */
|
|
||||||
var->data.atomic.buffer_index = ir->data.binding;
|
|
||||||
var->data.atomic.offset = ir->data.atomic.offset;
|
var->data.atomic.offset = ir->data.atomic.offset;
|
||||||
var->data.image.read_only = ir->data.image_read_only;
|
var->data.image.read_only = ir->data.image_read_only;
|
||||||
var->data.image.write_only = ir->data.image_write_only;
|
var->data.image.write_only = ir->data.image_write_only;
|
||||||
|
|
|
@ -308,7 +308,6 @@ typedef struct {
|
||||||
* Location an atomic counter is stored at.
|
* Location an atomic counter is stored at.
|
||||||
*/
|
*/
|
||||||
struct {
|
struct {
|
||||||
unsigned buffer_index;
|
|
||||||
unsigned offset;
|
unsigned offset;
|
||||||
} atomic;
|
} atomic;
|
||||||
|
|
||||||
|
@ -1978,7 +1977,8 @@ void nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables);
|
||||||
|
|
||||||
void nir_lower_two_sided_color(nir_shader *shader);
|
void nir_lower_two_sided_color(nir_shader *shader);
|
||||||
|
|
||||||
void nir_lower_atomics(nir_shader *shader);
|
void nir_lower_atomics(nir_shader *shader,
|
||||||
|
const struct gl_shader_program *shader_program);
|
||||||
void nir_lower_to_source_mods(nir_shader *shader);
|
void nir_lower_to_source_mods(nir_shader *shader);
|
||||||
|
|
||||||
bool nir_lower_gs_intrinsics(nir_shader *shader);
|
bool nir_lower_gs_intrinsics(nir_shader *shader);
|
||||||
|
|
|
@ -25,17 +25,24 @@
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "ir_uniform.h"
|
||||||
#include "nir.h"
|
#include "nir.h"
|
||||||
#include "main/config.h"
|
#include "main/config.h"
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
const struct gl_shader_program *shader_program;
|
||||||
|
nir_shader *shader;
|
||||||
|
} lower_atomic_state;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* replace atomic counter intrinsics that use a variable with intrinsics
|
* replace atomic counter intrinsics that use a variable with intrinsics
|
||||||
* that directly store the buffer index and byte offset
|
* that directly store the buffer index and byte offset
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static void
|
static void
|
||||||
lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl)
|
lower_instr(nir_intrinsic_instr *instr,
|
||||||
|
lower_atomic_state *state)
|
||||||
{
|
{
|
||||||
nir_intrinsic_op op;
|
nir_intrinsic_op op;
|
||||||
switch (instr->intrinsic) {
|
switch (instr->intrinsic) {
|
||||||
|
@ -60,10 +67,11 @@ lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl)
|
||||||
return; /* atomics passed as function arguments can't be lowered */
|
return; /* atomics passed as function arguments can't be lowered */
|
||||||
|
|
||||||
void *mem_ctx = ralloc_parent(instr);
|
void *mem_ctx = ralloc_parent(instr);
|
||||||
|
unsigned uniform_loc = instr->variables[0]->var->data.location;
|
||||||
|
|
||||||
nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op);
|
nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op);
|
||||||
new_instr->const_index[0] =
|
new_instr->const_index[0] =
|
||||||
(int) instr->variables[0]->var->data.atomic.buffer_index;
|
state->shader_program->UniformStorage[uniform_loc].opaque[state->shader->stage].index;
|
||||||
|
|
||||||
nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, 1);
|
nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, 1);
|
||||||
offset_const->value.u[0] = instr->variables[0]->var->data.atomic.offset;
|
offset_const->value.u[0] = instr->variables[0]->var->data.atomic.offset;
|
||||||
|
@ -132,18 +140,25 @@ lower_block(nir_block *block, void *state)
|
||||||
{
|
{
|
||||||
nir_foreach_instr_safe(block, instr) {
|
nir_foreach_instr_safe(block, instr) {
|
||||||
if (instr->type == nir_instr_type_intrinsic)
|
if (instr->type == nir_instr_type_intrinsic)
|
||||||
lower_instr(nir_instr_as_intrinsic(instr), state);
|
lower_instr(nir_instr_as_intrinsic(instr),
|
||||||
|
(lower_atomic_state *) state);
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
nir_lower_atomics(nir_shader *shader)
|
nir_lower_atomics(nir_shader *shader,
|
||||||
|
const struct gl_shader_program *shader_program)
|
||||||
{
|
{
|
||||||
|
lower_atomic_state state = {
|
||||||
|
.shader = shader,
|
||||||
|
.shader_program = shader_program,
|
||||||
|
};
|
||||||
|
|
||||||
nir_foreach_overload(shader, overload) {
|
nir_foreach_overload(shader, overload) {
|
||||||
if (overload->impl) {
|
if (overload->impl) {
|
||||||
nir_foreach_block(overload->impl, lower_block, overload->impl);
|
nir_foreach_block(overload->impl, lower_block, (void *) &state);
|
||||||
nir_metadata_preserve(overload->impl, nir_metadata_block_index |
|
nir_metadata_preserve(overload->impl, nir_metadata_block_index |
|
||||||
nir_metadata_dominance);
|
nir_metadata_dominance);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1463,7 +1463,7 @@ void brw_upload_ubo_surfaces(struct brw_context *brw,
|
||||||
struct brw_stage_prog_data *prog_data,
|
struct brw_stage_prog_data *prog_data,
|
||||||
bool dword_pitch);
|
bool dword_pitch);
|
||||||
void brw_upload_abo_surfaces(struct brw_context *brw,
|
void brw_upload_abo_surfaces(struct brw_context *brw,
|
||||||
struct gl_shader_program *prog,
|
struct gl_shader *shader,
|
||||||
struct brw_stage_state *stage_state,
|
struct brw_stage_state *stage_state,
|
||||||
struct brw_stage_prog_data *prog_data);
|
struct brw_stage_prog_data *prog_data);
|
||||||
void brw_upload_image_surfaces(struct brw_context *brw,
|
void brw_upload_image_surfaces(struct brw_context *brw,
|
||||||
|
|
|
@ -105,8 +105,8 @@ brw_upload_gs_abo_surfaces(struct brw_context *brw)
|
||||||
|
|
||||||
if (prog) {
|
if (prog) {
|
||||||
/* BRW_NEW_GS_PROG_DATA */
|
/* BRW_NEW_GS_PROG_DATA */
|
||||||
brw_upload_abo_surfaces(brw, prog, &brw->gs.base,
|
brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_GEOMETRY],
|
||||||
&brw->gs.prog_data->base.base);
|
&brw->gs.base, &brw->gs.prog_data->base.base);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -249,8 +249,10 @@ brw_create_nir(struct brw_context *brw,
|
||||||
nir_lower_system_values(nir);
|
nir_lower_system_values(nir);
|
||||||
nir_validate_shader(nir);
|
nir_validate_shader(nir);
|
||||||
|
|
||||||
nir_lower_atomics(nir);
|
if (shader_prog) {
|
||||||
nir_validate_shader(nir);
|
nir_lower_atomics(nir, shader_prog);
|
||||||
|
nir_validate_shader(nir);
|
||||||
|
}
|
||||||
|
|
||||||
nir_optimize(nir, is_scalar);
|
nir_optimize(nir, is_scalar);
|
||||||
|
|
||||||
|
|
|
@ -1191,9 +1191,9 @@ brw_assign_common_binding_table_offsets(gl_shader_stage stage,
|
||||||
stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
|
stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (shader_prog && shader_prog->NumAtomicBuffers) {
|
if (shader && shader->NumAtomicBuffers) {
|
||||||
stage_prog_data->binding_table.abo_start = next_binding_table_offset;
|
stage_prog_data->binding_table.abo_start = next_binding_table_offset;
|
||||||
next_binding_table_offset += shader_prog->NumAtomicBuffers;
|
next_binding_table_offset += shader->NumAtomicBuffers;
|
||||||
} else {
|
} else {
|
||||||
stage_prog_data->binding_table.abo_start = 0xd0d0d0d0;
|
stage_prog_data->binding_table.abo_start = 0xd0d0d0d0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -177,8 +177,8 @@ brw_upload_vs_abo_surfaces(struct brw_context *brw)
|
||||||
|
|
||||||
if (prog) {
|
if (prog) {
|
||||||
/* BRW_NEW_VS_PROG_DATA */
|
/* BRW_NEW_VS_PROG_DATA */
|
||||||
brw_upload_abo_surfaces(brw, prog, &brw->vs.base,
|
brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_VERTEX],
|
||||||
&brw->vs.prog_data->base.base);
|
&brw->vs.base, &brw->vs.prog_data->base.base);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1029,7 +1029,7 @@ const struct brw_tracked_state brw_cs_ubo_surfaces = {
|
||||||
|
|
||||||
void
|
void
|
||||||
brw_upload_abo_surfaces(struct brw_context *brw,
|
brw_upload_abo_surfaces(struct brw_context *brw,
|
||||||
struct gl_shader_program *prog,
|
struct gl_shader *shader,
|
||||||
struct brw_stage_state *stage_state,
|
struct brw_stage_state *stage_state,
|
||||||
struct brw_stage_prog_data *prog_data)
|
struct brw_stage_prog_data *prog_data)
|
||||||
{
|
{
|
||||||
|
@ -1037,21 +1037,22 @@ brw_upload_abo_surfaces(struct brw_context *brw,
|
||||||
uint32_t *surf_offsets =
|
uint32_t *surf_offsets =
|
||||||
&stage_state->surf_offset[prog_data->binding_table.abo_start];
|
&stage_state->surf_offset[prog_data->binding_table.abo_start];
|
||||||
|
|
||||||
for (unsigned i = 0; i < prog->NumAtomicBuffers; i++) {
|
if (shader && shader->NumAtomicBuffers) {
|
||||||
struct gl_atomic_buffer_binding *binding =
|
for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) {
|
||||||
&ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding];
|
struct gl_atomic_buffer_binding *binding =
|
||||||
struct intel_buffer_object *intel_bo =
|
&ctx->AtomicBufferBindings[shader->AtomicBuffers[i]->Binding];
|
||||||
intel_buffer_object(binding->BufferObject);
|
struct intel_buffer_object *intel_bo =
|
||||||
drm_intel_bo *bo = intel_bufferobj_buffer(
|
intel_buffer_object(binding->BufferObject);
|
||||||
brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
|
drm_intel_bo *bo = intel_bufferobj_buffer(
|
||||||
|
brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
|
||||||
|
|
||||||
brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
|
brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
|
||||||
binding->Offset, BRW_SURFACEFORMAT_RAW,
|
binding->Offset, BRW_SURFACEFORMAT_RAW,
|
||||||
bo->size - binding->Offset, 1, true);
|
bo->size - binding->Offset, 1, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (prog->NumAtomicBuffers)
|
|
||||||
brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
|
brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -1063,8 +1064,8 @@ brw_upload_wm_abo_surfaces(struct brw_context *brw)
|
||||||
|
|
||||||
if (prog) {
|
if (prog) {
|
||||||
/* BRW_NEW_FS_PROG_DATA */
|
/* BRW_NEW_FS_PROG_DATA */
|
||||||
brw_upload_abo_surfaces(brw, prog, &brw->wm.base,
|
brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
|
||||||
&brw->wm.prog_data->base);
|
&brw->wm.base, &brw->wm.prog_data->base);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1088,8 +1089,8 @@ brw_upload_cs_abo_surfaces(struct brw_context *brw)
|
||||||
|
|
||||||
if (prog) {
|
if (prog) {
|
||||||
/* BRW_NEW_CS_PROG_DATA */
|
/* BRW_NEW_CS_PROG_DATA */
|
||||||
brw_upload_abo_surfaces(brw, prog, &brw->cs.base,
|
brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
|
||||||
&brw->cs.prog_data->base);
|
&brw->cs.base, &brw->cs.prog_data->base);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2389,6 +2389,9 @@ struct gl_shader
|
||||||
*/
|
*/
|
||||||
GLuint NumImages;
|
GLuint NumImages;
|
||||||
|
|
||||||
|
struct gl_active_atomic_buffer **AtomicBuffers;
|
||||||
|
unsigned NumAtomicBuffers;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Whether early fragment tests are enabled as defined by
|
* Whether early fragment tests are enabled as defined by
|
||||||
* ARB_shader_image_load_store.
|
* ARB_shader_image_load_store.
|
||||||
|
@ -4496,7 +4499,7 @@ static inline bool
|
||||||
_mesa_active_fragment_shader_has_atomic_ops(const struct gl_context *ctx)
|
_mesa_active_fragment_shader_has_atomic_ops(const struct gl_context *ctx)
|
||||||
{
|
{
|
||||||
return ctx->Shader._CurrentFragmentProgram != NULL &&
|
return ctx->Shader._CurrentFragmentProgram != NULL &&
|
||||||
ctx->Shader._CurrentFragmentProgram->NumAtomicBuffers > 0;
|
ctx->Shader._CurrentFragmentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT]->NumAtomicBuffers > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|
Loading…
Reference in New Issue