diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c index 505023d2851..fe991a46153 100644 --- a/src/mesa/drivers/dri/i965/brw_compute.c +++ b/src/mesa/drivers/dri/i965/brw_compute.c @@ -184,6 +184,7 @@ brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups) { brw->compute.num_work_groups_bo = NULL; brw->compute.num_work_groups = num_groups; + ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS; brw_dispatch_compute_common(ctx); } @@ -202,6 +203,7 @@ brw_dispatch_compute_indirect(struct gl_context *ctx, GLintptr indirect) brw->compute.num_work_groups_bo = bo; brw->compute.num_work_groups_offset = indirect; brw->compute.num_work_groups = indirect_group_counts; + ctx->NewDriverState |= BRW_NEW_CS_WORK_GROUPS; brw_dispatch_compute_common(ctx); } diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index e407dec8eab..8b790fe0bca 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -213,6 +213,7 @@ enum brw_state_id { BRW_STATE_SAMPLER_STATE_TABLE, BRW_STATE_VS_ATTRIB_WORKAROUNDS, BRW_STATE_COMPUTE_PROGRAM, + BRW_STATE_CS_WORK_GROUPS, BRW_NUM_STATE_BITS }; @@ -294,6 +295,7 @@ enum brw_state_id { #define BRW_NEW_SAMPLER_STATE_TABLE (1ull << BRW_STATE_SAMPLER_STATE_TABLE) #define BRW_NEW_VS_ATTRIB_WORKAROUNDS (1ull << BRW_STATE_VS_ATTRIB_WORKAROUNDS) #define BRW_NEW_COMPUTE_PROGRAM (1ull << BRW_STATE_COMPUTE_PROGRAM) +#define BRW_NEW_CS_WORK_GROUPS (1ull << BRW_STATE_CS_WORK_GROUPS) struct brw_state_flags { /** State update flags signalled by mesa internals */ @@ -497,6 +499,7 @@ struct brw_cs_prog_data { unsigned local_size[3]; unsigned simd_size; bool uses_barrier; + bool uses_num_work_groups; struct { /** @{ @@ -1545,7 +1548,7 @@ struct brw_context int num_atoms[BRW_NUM_PIPELINES]; const struct brw_tracked_state render_atoms[60]; - const struct brw_tracked_state compute_atoms[6]; + const struct brw_tracked_state compute_atoms[7]; /* If (INTEL_DEBUG & DEBUG_BATCH) */ struct { diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index afce8ad3b27..3b7a4330c7a 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -158,6 +158,7 @@ extern const struct brw_tracked_state gen8_sf_clip_viewport; extern const struct brw_tracked_state gen8_vertices; extern const struct brw_tracked_state gen8_vf_topology; extern const struct brw_tracked_state gen8_vs_state; +extern const struct brw_tracked_state brw_cs_work_groups_surface; static inline bool brw_state_dirty(struct brw_context *brw, GLuint mesa_flags, uint64_t brw_flags) diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 2e8a0b3de9b..46687e342d3 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -261,6 +261,7 @@ static const struct brw_tracked_state *gen7_compute_atoms[] = &gen7_cs_push_constants, &brw_cs_abo_surfaces, &brw_texture_surfaces, + &brw_cs_work_groups_surface, &brw_cs_state, }; @@ -353,6 +354,7 @@ static const struct brw_tracked_state *gen8_compute_atoms[] = &gen7_cs_push_constants, &brw_cs_abo_surfaces, &brw_texture_surfaces, + &brw_cs_work_groups_surface, &brw_cs_state, }; @@ -613,6 +615,7 @@ static struct dirty_bit_map brw_bits[] = { DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE), DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS), DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM), + DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS), {0, 0, 0} }; diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index f2aaa0b178e..c9316963840 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -1336,3 +1336,46 @@ gen4_init_vtable_surface_functions(struct brw_context *brw) brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state; brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state; } + +static void +brw_upload_cs_work_groups_surface(struct brw_context *brw) +{ + struct gl_context *ctx = &brw->ctx; + /* _NEW_PROGRAM */ + struct gl_shader_program *prog = + ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE]; + + if (prog && brw->cs.prog_data->uses_num_work_groups) { + const unsigned surf_idx = + brw->cs.prog_data->binding_table.work_groups_start; + uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx]; + drm_intel_bo *bo; + uint32_t bo_offset; + + if (brw->compute.num_work_groups_bo == NULL) { + bo = NULL; + intel_upload_data(brw, + (void *)brw->compute.num_work_groups, + 3 * sizeof(GLuint), + sizeof(GLuint), + &bo, + &bo_offset); + } else { + bo = brw->compute.num_work_groups_bo; + bo_offset = brw->compute.num_work_groups_offset; + } + + brw->vtbl.emit_buffer_surface_state(brw, surf_offset, + bo, bo_offset, + BRW_SURFACEFORMAT_RAW, + 3 * sizeof(GLuint), 1, true); + brw->ctx.NewDriverState |= BRW_NEW_SURFACES; + } +} + +const struct brw_tracked_state brw_cs_work_groups_surface = { + .dirty = { + .brw = BRW_NEW_CS_WORK_GROUPS + }, + .emit = brw_upload_cs_work_groups_surface, +};