radeonsi: merge the copy_image shader generators

Use a unified si_create_copy_image_cs() to generate both the 1D and 2D
copy shaders; the shaders themselves remain separate.

Also:
- add a helper deref_ssa() for nir deref
- add a helper set_work_size() for setting workgroup and grid sizes
- pass si_context (instead of pipe_context) to the copy_image generator

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15268>
This commit is contained in:
Mihai Preda 2022-03-21 07:37:08 +02:00 committed by Marge Bot
parent c0ef40bbce
commit 08f74e7185
4 changed files with 85 additions and 117 deletions

View File

@ -475,6 +475,21 @@ void si_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, struct p
}
}
static void
set_work_size(struct pipe_grid_info *info, unsigned block_x, unsigned block_y, unsigned block_z,
unsigned work_x, unsigned work_y, unsigned work_z)
{
info->block[0] = block_x;
info->block[1] = block_y;
info->block[2] = block_z;
unsigned work[3] = {work_x, work_y, work_z};
for (int i = 0; i < 3; ++i) {
info->last_block[i] = work[i] % info->block[i];
info->grid[i] = DIV_ROUND_UP(work[i], info->block[i]);
}
}
void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, unsigned dst_level,
struct pipe_resource *src, unsigned src_level, unsigned dstx,
unsigned dsty, unsigned dstz, const struct pipe_box *src_box,
@ -489,6 +504,7 @@ void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, u
enum pipe_format src_format = util_format_linear(src->format);
enum pipe_format dst_format = util_format_linear(dst->format);
bool is_linear = ssrc->surface.is_linear || sdst->surface.is_linear;
bool is_1D = dst->target == PIPE_TEXTURE_1D_ARRAY && src->target == PIPE_TEXTURE_1D_ARRAY;
assert(util_format_is_subsampled_422(src_format) == util_format_is_subsampled_422(dst_format));
@ -579,12 +595,6 @@ void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, u
ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 2, 0, image);
if (!is_dcc_decompress) {
sctx->cs_user_data[0] = src_box->x | (dstx << 16);
sctx->cs_user_data[1] = src_box->y | (dsty << 16);
sctx->cs_user_data[2] = src_box->z | (dstz << 16);
}
struct pipe_grid_info info = {0};
if (is_dcc_decompress) {
@ -593,64 +603,56 @@ void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, u
* the DCC block size or a multiple thereof. The shader uses a barrier
* between loads and stores to safely overwrite each DCC block of pixels.
*/
unsigned dim[3] = {src_box->width, src_box->height, src_box->depth};
assert(src == dst);
assert(dst->target != PIPE_TEXTURE_1D && dst->target != PIPE_TEXTURE_1D_ARRAY);
if (!sctx->cs_dcc_decompress)
sctx->cs_dcc_decompress = si_create_dcc_decompress_cs(ctx);
info.block[0] = ssrc->surface.u.gfx9.color.dcc_block_width;
info.block[1] = ssrc->surface.u.gfx9.color.dcc_block_height;
info.block[2] = ssrc->surface.u.gfx9.color.dcc_block_depth;
unsigned block_x = ssrc->surface.u.gfx9.color.dcc_block_width;
unsigned block_y = ssrc->surface.u.gfx9.color.dcc_block_height;
unsigned block_z = ssrc->surface.u.gfx9.color.dcc_block_depth;
unsigned default_wave_size = si_determine_wave_size(sctx->screen, NULL);;
/* Make sure the block size is at least the same as wave size. */
while (info.block[0] * info.block[1] * info.block[2] < default_wave_size) {
info.block[0] *= 2;
while (block_x * block_y * block_z < default_wave_size) {
block_x *= 2;
}
for (unsigned i = 0; i < 3; i++) {
info.last_block[i] = dim[i] % info.block[i];
info.grid[i] = DIV_ROUND_UP(dim[i], info.block[i]);
}
set_work_size(&info, block_x, block_y, block_z, src_box->width, src_box->height, src_box->depth);
si_launch_grid_internal(sctx, &info, sctx->cs_dcc_decompress, flags | SI_OP_CS_IMAGE);
} else if (dst->target == PIPE_TEXTURE_1D_ARRAY && src->target == PIPE_TEXTURE_1D_ARRAY) {
if (!sctx->cs_copy_image_1d_array)
sctx->cs_copy_image_1d_array = si_create_copy_image_1d_array_cs(ctx);
info.block[0] = 64;
info.last_block[0] = width % 64;
info.block[1] = 1;
info.block[2] = 1;
info.grid[0] = DIV_ROUND_UP(width, 64);
info.grid[1] = depth;
info.grid[2] = 1;
si_launch_grid_internal(sctx, &info, sctx->cs_copy_image_1d_array, flags | SI_OP_CS_IMAGE);
} else {
if (!sctx->cs_copy_image)
sctx->cs_copy_image = si_create_copy_image_cs(ctx);
sctx->cs_user_data[0] = src_box->x | (dstx << 16);
/* This is better for access over PCIe. */
if (is_linear) {
info.block[0] = 64;
info.block[1] = 1;
int block_x = is_1D || is_linear ? 64 : 8;
int block_y = is_1D || is_linear ? 1 : 8;
int block_z = 1;
if (is_1D) {
assert(height == 1); /* height is not used for 1D images */
assert(src_box->y == 0 && dsty == 0);
sctx->cs_user_data[1] = src_box->z | (dstz << 16);
/* We pass array index in 'y' for 1D images. */
height = depth;
depth = 1;
} else {
info.block[0] = 8;
info.block[1] = 8;
sctx->cs_user_data[1] = src_box->y | (dsty << 16);
sctx->cs_user_data[2] = src_box->z | (dstz << 16);
}
info.last_block[0] = width % info.block[0];
info.last_block[1] = height % info.block[1];
info.block[2] = 1;
info.grid[0] = DIV_ROUND_UP(width, info.block[0]);
info.grid[1] = DIV_ROUND_UP(height, info.block[1]);
info.grid[2] = depth;
si_launch_grid_internal(sctx, &info, sctx->cs_copy_image, flags | SI_OP_CS_IMAGE);
set_work_size(&info, block_x, block_y, block_z, width, height, depth);
void **copy_image_cs_ptr = is_1D ? &sctx->cs_copy_image_1D : &sctx->cs_copy_image_2D;
if (!*copy_image_cs_ptr)
*copy_image_cs_ptr = si_create_copy_image_cs(sctx, is_1D);
assert(*copy_image_cs_ptr);
si_launch_grid_internal(sctx, &info, *copy_image_cs_ptr, flags | SI_OP_CS_IMAGE);
}
ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 2, 0, saved_image);

View File

@ -258,10 +258,10 @@ static void si_destroy_context(struct pipe_context *context)
sctx->b.delete_compute_state(&sctx->b, sctx->cs_clear_buffer_rmw);
if (sctx->cs_copy_buffer)
sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_buffer);
if (sctx->cs_copy_image)
sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_image);
if (sctx->cs_copy_image_1d_array)
sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_image_1d_array);
if (sctx->cs_copy_image_1D)
sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_image_1D);
if (sctx->cs_copy_image_2D)
sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_image_2D);
if (sctx->cs_clear_render_target)
sctx->b.delete_compute_state(&sctx->b, sctx->cs_clear_render_target);
if (sctx->cs_clear_render_target_1d_array)

View File

@ -970,8 +970,8 @@ struct si_context {
void *cs_clear_buffer;
void *cs_clear_buffer_rmw;
void *cs_copy_buffer;
void *cs_copy_image;
void *cs_copy_image_1d_array;
void *cs_copy_image_1D;
void *cs_copy_image_2D;
void *cs_clear_render_target;
void *cs_clear_render_target_1d_array;
void *cs_clear_12bytes_buffer;
@ -1517,8 +1517,7 @@ void si_suspend_queries(struct si_context *sctx);
void si_resume_queries(struct si_context *sctx);
/* si_shaderlib_nir.c */
void *si_create_copy_image_cs(struct pipe_context *ctx);
void *si_create_copy_image_1d_array_cs(struct pipe_context *ctx);
void *si_create_copy_image_cs(struct si_context *sctx, bool is_1D);
void *si_create_dcc_retile_cs(struct si_context *sctx, struct radeon_surf *surf);
void *gfx9_create_clear_dcc_msaa_cs(struct si_context *sctx, struct si_texture *tex);

View File

@ -53,32 +53,46 @@ static void unpack_2x16(nir_builder *b, nir_ssa_def *src, nir_ssa_def **x, nir_s
*y = nir_ushr(b, src, nir_imm_int(b, 16));
}
/* Create a NIR compute shader implementing copy_image for 1D_ARRAY images.
*/
void *si_create_copy_image_1d_array_cs(struct pipe_context *ctx)
static nir_ssa_def *
deref_ssa(nir_builder *b, nir_variable *var)
{
struct si_context *sctx = (struct si_context *) ctx;
return &nir_build_deref_var(b, var)->dest.ssa;
}
/* Create a NIR compute shader implementing copy_image.
*
* This shader can handle 1D and 2D, linear and non-linear images.
* It expects the source and destination (x,y,z) coords as user_data_amd,
* packed into 3 SGPRs as 2x16bits per component.
*/
void *si_create_copy_image_cs(struct si_context *sctx, bool is_1D)
{
const nir_shader_compiler_options *options =
sctx->b.screen->get_compiler_options(sctx->b.screen, PIPE_SHADER_IR_NIR, PIPE_SHADER_COMPUTE);
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, "copy_image_1d_array_cs");
b.shader->info.workgroup_size[0] = 64;
b.shader->info.workgroup_size[1] = 1;
b.shader->info.workgroup_size[2] = 1;
b.shader->info.cs.user_data_components_amd = 3;
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, "copy_image_cs");
b.shader->info.num_images = 2;
/* The workgroup size is either 8x8 for normal (non-linear) 2D images,
* or 64x1 for 1D and linear-2D images.
*/
b.shader->info.workgroup_size_variable = true;
/* 1D uses 'x' as image coord, and 'y' as array index.
* 2D uses 'x'&'y' as image coords, and 'z' as array index.
*/
int n_components = is_1D ? 2 : 3;
b.shader->info.cs.user_data_components_amd = n_components;
nir_ssa_def *ids = get_global_ids(&b, n_components);
nir_ssa_def *coord_src = NULL, *coord_dst = NULL;
unpack_2x16(&b, nir_load_user_data_amd(&b), &coord_src, &coord_dst);
nir_ssa_def *ids = get_global_ids(&b, 3);
coord_src = nir_channels(&b, nir_iadd(&b, coord_src, ids), /*xz*/ 0x5);
coord_dst = nir_channels(&b, nir_iadd(&b, coord_dst, ids), /*xz*/ 0x5);
coord_src = nir_iadd(&b, coord_src, ids);
coord_dst = nir_iadd(&b, coord_dst, ids);
const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_1D, /*is_array*/ true, GLSL_TYPE_FLOAT);
const struct glsl_type *img_type = glsl_image_type(is_1D ? GLSL_SAMPLER_DIM_1D : GLSL_SAMPLER_DIM_2D,
/*is_array*/ true, GLSL_TYPE_FLOAT);
nir_variable *img_src = nir_variable_create(b.shader, nir_var_image, img_type, "img_src");
img_src->data.binding = 0;
@ -90,56 +104,9 @@ void *si_create_copy_image_1d_array_cs(struct pipe_context *ctx)
nir_ssa_def *zero = nir_imm_int(&b, 0);
nir_ssa_def *data = nir_image_deref_load(&b, /*num_components*/ 4, /*bit_size*/ 32,
&nir_build_deref_var(&b, img_src)->dest.ssa, coord_src, undef32, zero);
deref_ssa(&b, img_src), coord_src, undef32, zero);
nir_image_deref_store(&b,
&nir_build_deref_var(&b, img_dst)->dest.ssa, coord_dst, undef32, data, zero);
return create_nir_cs(sctx, &b);
}
/* Create a NIR compute shader implementing copy_image.
*
* This is the NIR version of the removed si_create_copy_image_compute_shader() [TGSI].
* It inherits the following note from the TGSI version:
* "Luckily, this works with all texture targets except 1D_ARRAY."
*/
void *si_create_copy_image_cs(struct pipe_context *ctx)
{
struct si_context *sctx = (struct si_context *) ctx;
const nir_shader_compiler_options *options =
sctx->b.screen->get_compiler_options(sctx->b.screen, PIPE_SHADER_IR_NIR, PIPE_SHADER_COMPUTE);
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, "copy_image_cs");
b.shader->info.workgroup_size_variable = true;
b.shader->info.cs.user_data_components_amd = 3;
b.shader->info.num_images = 2;
nir_ssa_def *coord_src = NULL, *coord_dst = NULL;
unpack_2x16(&b, nir_load_user_data_amd(&b), &coord_src, &coord_dst);
nir_ssa_def *ids = get_global_ids(&b, 3);
coord_src = nir_iadd(&b, coord_src, ids);
coord_dst = nir_iadd(&b, coord_dst, ids);
const struct glsl_type *img_type =
glsl_image_type(GLSL_SAMPLER_DIM_2D, /*is_array*/ true, GLSL_TYPE_FLOAT);
nir_variable *img_src = nir_variable_create(b.shader, nir_var_image, img_type, "img_src");
img_src->data.binding = 0;
nir_variable *img_dst = nir_variable_create(b.shader, nir_var_image, img_type, "img_dst");
img_dst->data.binding = 1;
nir_ssa_def *data = nir_image_deref_load(&b, /*num_components*/ 4, /*bit_size*/ 32,
&nir_build_deref_var(&b, img_src)->dest.ssa, coord_src, nir_ssa_undef(&b, 1, 32),
nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
nir_image_deref_store(&b,
&nir_build_deref_var(&b, img_dst)->dest.ssa, coord_dst, nir_ssa_undef(&b, 1, 32), data,
nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
nir_image_deref_store(&b, deref_ssa(&b, img_dst), coord_dst, undef32, data, zero);
return create_nir_cs(sctx, &b);
}