i965/cs: Setup surface binding for gl_NumWorkGroups
This will only be setup when the prog_data uses_num_work_groups boolean is set. At this point nothing will set uses_num_work_groups, but soon code will set it when emitting code for the intrinsic that loads gl_NumWorkGroups. We can't emit this surface information earlier at the start of the DispatchCompute* call because we may not have generated the program yet. Until we generate the program, we don't know if the gl_NumWorkGroups variable is accessed. We also can't emit the surface as part of the brw_cs_state atom, because we might not need the surface if gl_NumWorkGroups is not used by the program. Lastly, we cannot emit the surface later (after state upload) in the DispatchCompute* call, because it needs to be run before the brw_cs_state atom is emitted, since it changes the surface state. Signed-off-by: Jordan Justen <jordan.l.justen@intel.com> Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
This commit is contained in:
parent
d1be9d2126
commit
63d7b33f51
|
@ -184,6 +184,7 @@ brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups) {
|
|||
|
||||
brw->compute.num_work_groups_bo = NULL;
|
||||
brw->compute.num_work_groups = num_groups;
|
||||
ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS;
|
||||
|
||||
brw_dispatch_compute_common(ctx);
|
||||
}
|
||||
|
@ -202,6 +203,7 @@ brw_dispatch_compute_indirect(struct gl_context *ctx, GLintptr indirect)
|
|||
brw->compute.num_work_groups_bo = bo;
|
||||
brw->compute.num_work_groups_offset = indirect;
|
||||
brw->compute.num_work_groups = indirect_group_counts;
|
||||
ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS;
|
||||
|
||||
brw_dispatch_compute_common(ctx);
|
||||
}
|
||||
|
|
|
@ -213,6 +213,7 @@ enum brw_state_id {
|
|||
BRW_STATE_SAMPLER_STATE_TABLE,
|
||||
BRW_STATE_VS_ATTRIB_WORKAROUNDS,
|
||||
BRW_STATE_COMPUTE_PROGRAM,
|
||||
BRW_STATE_CS_WORK_GROUPS,
|
||||
BRW_NUM_STATE_BITS
|
||||
};
|
||||
|
||||
|
@ -294,6 +295,7 @@ enum brw_state_id {
|
|||
#define BRW_NEW_SAMPLER_STATE_TABLE (1ull << BRW_STATE_SAMPLER_STATE_TABLE)
|
||||
#define BRW_NEW_VS_ATTRIB_WORKAROUNDS (1ull << BRW_STATE_VS_ATTRIB_WORKAROUNDS)
|
||||
#define BRW_NEW_COMPUTE_PROGRAM (1ull << BRW_STATE_COMPUTE_PROGRAM)
|
||||
#define BRW_NEW_CS_WORK_GROUPS (1ull << BRW_STATE_CS_WORK_GROUPS)
|
||||
|
||||
struct brw_state_flags {
|
||||
/** State update flags signalled by mesa internals */
|
||||
|
@ -497,6 +499,7 @@ struct brw_cs_prog_data {
|
|||
unsigned local_size[3];
|
||||
unsigned simd_size;
|
||||
bool uses_barrier;
|
||||
bool uses_num_work_groups;
|
||||
|
||||
struct {
|
||||
/** @{
|
||||
|
@ -1545,7 +1548,7 @@ struct brw_context
|
|||
|
||||
int num_atoms[BRW_NUM_PIPELINES];
|
||||
const struct brw_tracked_state render_atoms[60];
|
||||
const struct brw_tracked_state compute_atoms[6];
|
||||
const struct brw_tracked_state compute_atoms[7];
|
||||
|
||||
/* If (INTEL_DEBUG & DEBUG_BATCH) */
|
||||
struct {
|
||||
|
|
|
@ -158,6 +158,7 @@ extern const struct brw_tracked_state gen8_sf_clip_viewport;
|
|||
extern const struct brw_tracked_state gen8_vertices;
|
||||
extern const struct brw_tracked_state gen8_vf_topology;
|
||||
extern const struct brw_tracked_state gen8_vs_state;
|
||||
extern const struct brw_tracked_state brw_cs_work_groups_surface;
|
||||
|
||||
static inline bool
|
||||
brw_state_dirty(struct brw_context *brw, GLuint mesa_flags, uint64_t brw_flags)
|
||||
|
|
|
@ -261,6 +261,7 @@ static const struct brw_tracked_state *gen7_compute_atoms[] =
|
|||
&gen7_cs_push_constants,
|
||||
&brw_cs_abo_surfaces,
|
||||
&brw_texture_surfaces,
|
||||
&brw_cs_work_groups_surface,
|
||||
&brw_cs_state,
|
||||
};
|
||||
|
||||
|
@ -353,6 +354,7 @@ static const struct brw_tracked_state *gen8_compute_atoms[] =
|
|||
&gen7_cs_push_constants,
|
||||
&brw_cs_abo_surfaces,
|
||||
&brw_texture_surfaces,
|
||||
&brw_cs_work_groups_surface,
|
||||
&brw_cs_state,
|
||||
};
|
||||
|
||||
|
@ -613,6 +615,7 @@ static struct dirty_bit_map brw_bits[] = {
|
|||
DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
|
||||
DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
|
||||
DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
|
||||
DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS),
|
||||
{0, 0, 0}
|
||||
};
|
||||
|
||||
|
|
|
@ -1336,3 +1336,46 @@ gen4_init_vtable_surface_functions(struct brw_context *brw)
|
|||
brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
|
||||
brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
|
||||
}
|
||||
|
||||
static void
|
||||
brw_upload_cs_work_groups_surface(struct brw_context *brw)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
/* _NEW_PROGRAM */
|
||||
struct gl_shader_program *prog =
|
||||
ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
|
||||
|
||||
if (prog && brw->cs.prog_data->uses_num_work_groups) {
|
||||
const unsigned surf_idx =
|
||||
brw->cs.prog_data->binding_table.work_groups_start;
|
||||
uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
|
||||
drm_intel_bo *bo;
|
||||
uint32_t bo_offset;
|
||||
|
||||
if (brw->compute.num_work_groups_bo == NULL) {
|
||||
bo = NULL;
|
||||
intel_upload_data(brw,
|
||||
(void *)brw->compute.num_work_groups,
|
||||
3 * sizeof(GLuint),
|
||||
sizeof(GLuint),
|
||||
&bo,
|
||||
&bo_offset);
|
||||
} else {
|
||||
bo = brw->compute.num_work_groups_bo;
|
||||
bo_offset = brw->compute.num_work_groups_offset;
|
||||
}
|
||||
|
||||
brw->vtbl.emit_buffer_surface_state(brw, surf_offset,
|
||||
bo, bo_offset,
|
||||
BRW_SURFACEFORMAT_RAW,
|
||||
3 * sizeof(GLuint), 1, true);
|
||||
brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
|
||||
}
|
||||
}
|
||||
|
||||
const struct brw_tracked_state brw_cs_work_groups_surface = {
|
||||
.dirty = {
|
||||
.brw = BRW_NEW_CS_WORK_GROUPS
|
||||
},
|
||||
.emit = brw_upload_cs_work_groups_surface,
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue