gallium: add pipe_grid_info::last_block
The OpenMAX state tracker will use this. RadeonSI is adapted to use pipe_grid_info::last_block instead of its internal state. Acked-by: Leo Liu <leo.liu@amd.com>
This commit is contained in:
parent
34b3b92bbe
commit
b9e02fe138
|
@ -341,6 +341,9 @@ u_pipe_screen_get_param_defaults(struct pipe_screen *pscreen,
|
|||
case PIPE_CAP_MAX_VARYINGS:
|
||||
return 8;
|
||||
|
||||
case PIPE_CAP_COMPUTE_GRID_INFO_LAST_BLOCK:
|
||||
return 0;
|
||||
|
||||
default:
|
||||
unreachable("bad PIPE_CAP_*");
|
||||
}
|
||||
|
|
|
@ -492,6 +492,8 @@ The integer capabilities:
|
|||
varyings. This will generally correspond to
|
||||
``PIPE_SHADER_CAP_MAX_INPUTS`` for the fragment shader, but in some
|
||||
cases may be a smaller number.
|
||||
* ``PIPE_CAP_COMPUTE_GRID_INFO_LAST_BLOCK``: Whether pipe_grid_info::last_block
|
||||
is implemented by the driver. See struct pipe_grid_info for more details.
|
||||
|
||||
.. _pipe_capf:
|
||||
|
||||
|
|
|
@ -804,7 +804,7 @@ static void si_emit_dispatch_packets(struct si_context *sctx,
|
|||
* allow launching waves out-of-order. (same as Vulkan) */
|
||||
S_00B800_ORDER_MODE(sctx->chip_class >= CIK);
|
||||
|
||||
uint *last_block = sctx->compute_last_block;
|
||||
uint *last_block = info->last_block;
|
||||
bool partial_block_en = last_block[0] || last_block[1] || last_block[2];
|
||||
|
||||
radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
|
||||
|
|
|
@ -381,7 +381,7 @@ void si_compute_copy_image(struct si_context *sctx,
|
|||
si_create_copy_image_compute_shader_1d_array(ctx);
|
||||
ctx->bind_compute_state(ctx, sctx->cs_copy_image_1d_array);
|
||||
info.block[0] = 64;
|
||||
sctx->compute_last_block[0] = width % 64;
|
||||
info.last_block[0] = width % 64;
|
||||
info.block[1] = 1;
|
||||
info.block[2] = 1;
|
||||
info.grid[0] = DIV_ROUND_UP(width, 64);
|
||||
|
@ -392,9 +392,9 @@ void si_compute_copy_image(struct si_context *sctx,
|
|||
sctx->cs_copy_image = si_create_copy_image_compute_shader(ctx);
|
||||
ctx->bind_compute_state(ctx, sctx->cs_copy_image);
|
||||
info.block[0] = 8;
|
||||
sctx->compute_last_block[0] = width % 8;
|
||||
info.last_block[0] = width % 8;
|
||||
info.block[1] = 8;
|
||||
sctx->compute_last_block[1] = height % 8;
|
||||
info.last_block[1] = height % 8;
|
||||
info.block[2] = 1;
|
||||
info.grid[0] = DIV_ROUND_UP(width, 8);
|
||||
info.grid[1] = DIV_ROUND_UP(height, 8);
|
||||
|
@ -403,9 +403,6 @@ void si_compute_copy_image(struct si_context *sctx,
|
|||
|
||||
ctx->launch_grid(ctx, &info);
|
||||
|
||||
sctx->compute_last_block[0] = 0;
|
||||
sctx->compute_last_block[1] = 0;
|
||||
|
||||
sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
|
||||
(sctx->chip_class <= VI ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0) |
|
||||
si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM);
|
||||
|
@ -483,9 +480,9 @@ void si_compute_clear_render_target(struct pipe_context *ctx,
|
|||
sctx->cs_clear_render_target = si_clear_render_target_shader(ctx);
|
||||
ctx->bind_compute_state(ctx, sctx->cs_clear_render_target);
|
||||
info.block[0] = 8;
|
||||
sctx->compute_last_block[0] = width % 8;
|
||||
info.last_block[0] = width % 8;
|
||||
info.block[1] = 8;
|
||||
sctx->compute_last_block[1] = height % 8;
|
||||
info.last_block[1] = height % 8;
|
||||
info.block[2] = 1;
|
||||
info.grid[0] = DIV_ROUND_UP(width, 8);
|
||||
info.grid[1] = DIV_ROUND_UP(height, 8);
|
||||
|
@ -496,7 +493,7 @@ void si_compute_clear_render_target(struct pipe_context *ctx,
|
|||
si_clear_render_target_shader_1d_array(ctx);
|
||||
ctx->bind_compute_state(ctx, sctx->cs_clear_render_target_1d_array);
|
||||
info.block[0] = 64;
|
||||
sctx->compute_last_block[0] = width % 64;
|
||||
info.last_block[0] = width % 64;
|
||||
info.block[1] = 1;
|
||||
info.block[2] = 1;
|
||||
info.grid[0] = DIV_ROUND_UP(width, 64);
|
||||
|
@ -506,9 +503,6 @@ void si_compute_clear_render_target(struct pipe_context *ctx,
|
|||
|
||||
ctx->launch_grid(ctx, &info);
|
||||
|
||||
sctx->compute_last_block[0] = 0;
|
||||
sctx->compute_last_block[1] = 0;
|
||||
|
||||
sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
|
||||
(sctx->chip_class <= VI ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0) |
|
||||
si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM);
|
||||
|
|
|
@ -160,6 +160,7 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_TGSI_BALLOT:
|
||||
case PIPE_CAP_TGSI_VOTE:
|
||||
case PIPE_CAP_TGSI_FS_FBFETCH:
|
||||
case PIPE_CAP_COMPUTE_GRID_INFO_LAST_BLOCK:
|
||||
return 1;
|
||||
|
||||
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
|
||||
|
|
|
@ -921,28 +921,6 @@ struct si_context {
|
|||
uint32_t vs_blit_sh_data[SI_VS_BLIT_SGPRS_POS_TEXCOORD];
|
||||
uint32_t cs_user_data[4];
|
||||
|
||||
/**
|
||||
* last_block allows disabling threads at the farthermost grid boundary.
|
||||
* Full blocks as specified by "block" are launched, but the threads
|
||||
* outside of "last_block" dimensions are disabled.
|
||||
*
|
||||
* If a block touches the grid boundary in the i-th axis, threads with
|
||||
* THREAD_ID[i] >= last_block[i] are disabled.
|
||||
*
|
||||
* If last_block[i] is 0, it has the same behavior as last_block[i] = block[i],
|
||||
* meaning no effect.
|
||||
*
|
||||
* It's equivalent to doing this at the beginning of the compute shader:
|
||||
*
|
||||
* for (i = 0; i < 3; i++) {
|
||||
* if (block_id[i] == grid[i] - 1 &&
|
||||
* last_block[i] && last_block[i] >= thread_id[i])
|
||||
* return;
|
||||
* }
|
||||
* (this could be moved into pipe_grid_info)
|
||||
*/
|
||||
uint compute_last_block[3];
|
||||
|
||||
/* Vertex and index buffers. */
|
||||
bool vertex_buffers_dirty;
|
||||
bool vertex_buffer_pointer_dirty;
|
||||
|
|
|
@ -858,6 +858,7 @@ enum pipe_cap
|
|||
PIPE_CAP_DEST_SURFACE_SRGB_CONTROL,
|
||||
PIPE_CAP_NIR_COMPACT_ARRAYS,
|
||||
PIPE_CAP_MAX_VARYINGS,
|
||||
PIPE_CAP_COMPUTE_GRID_INFO_LAST_BLOCK,
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
@ -838,6 +838,27 @@ struct pipe_grid_info
|
|||
*/
|
||||
uint block[3];
|
||||
|
||||
/**
|
||||
* last_block allows disabling threads at the farthermost grid boundary.
|
||||
* Full blocks as specified by "block" are launched, but the threads
|
||||
* outside of "last_block" dimensions are disabled.
|
||||
*
|
||||
* If a block touches the grid boundary in the i-th axis, threads with
|
||||
* THREAD_ID[i] >= last_block[i] are disabled.
|
||||
*
|
||||
* If last_block[i] is 0, it has the same behavior as last_block[i] = block[i],
|
||||
* meaning no effect.
|
||||
*
|
||||
* It's equivalent to doing this at the beginning of the compute shader:
|
||||
*
|
||||
* for (i = 0; i < 3; i++) {
|
||||
* if (block_id[i] == grid[i] - 1 &&
|
||||
* last_block[i] && thread_id[i] >= last_block[i])
|
||||
* return;
|
||||
* }
|
||||
*/
|
||||
uint last_block[3];
|
||||
|
||||
/**
|
||||
* Determine the layout of the grid (in block units) to be used.
|
||||
*/
|
||||
|
|
Loading…
Reference in New Issue