intel/blorp: Support some image/buffer blit operations using compute
Reworks: * Use BLORP_BATCH_USE_COMPUTE flag rather than compute param to blorp_copy (s-b Jason) * Squash "intel/blorp: Set shader_pipeline for compute" * Squash "intel/blorp: Add blorp_copy_supports_compute function" * Squash "intel: Support compute for image/buffer copy if INTEL_DEBUG=blocs is set" * Squash "intel/blorp: Support compute for some blit operations" * Use nir_image_store (s-b Jason) * Use nir_push_if (s-b Jason) * Require gfx12 for ccs in blorp_copy_supports_compute (s-b Jason) * Add nir_pop_if (s-b Ken) * Fix aux_usage check on gfx12 blorp_copy_supports_compute (s-b Ken) * Use blorp_set_cs_dims (s-b Jason) * Use dim=2d with array=true for nir_image_store (s-b Jason, Francisco) * Restructure gen checks in blorp_copy_supports_compute (s-b Ken) * Use nir_load_global_invocation_id (s-b Jason) * Fix inefficient calculation of store_pos (s-b Jason) * Use bounds_if being NULL/non-NULL for nir_pop_if (s-b Jason) * discard => bounds (s-b Ken) * Re-add ISL_AUX_USAGE_CCS_E in *_supports_compute (s-b Sagar) * Skip duplicated in_bounds calculation (s-b Jason) Signed-off-by: Jordan Justen <jordan.l.justen@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11564>
This commit is contained in:
parent
71c4937833
commit
06691131e9
|
@ -178,6 +178,14 @@ blorp_clear_supports_compute(struct blorp_context *blorp,
|
|||
uint8_t color_write_disable, bool blend_enabled,
|
||||
enum isl_aux_usage aux_usage);
|
||||
|
||||
bool
|
||||
blorp_copy_supports_compute(struct blorp_context *blorp,
|
||||
enum isl_aux_usage dst_aux_usage);
|
||||
|
||||
bool
|
||||
blorp_blit_supports_compute(struct blorp_context *blorp,
|
||||
enum isl_aux_usage dst_aux_usage);
|
||||
|
||||
void
|
||||
blorp_clear(struct blorp_batch *batch,
|
||||
const struct blorp_surf *surf,
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include "compiler/nir/nir_format_convert.h"
|
||||
|
||||
#include "blorp_priv.h"
|
||||
#include "dev/intel_debug.h"
|
||||
|
||||
#include "util/format_rgb9e5.h"
|
||||
/* header-only include needed for _mesa_unorm_to_float and friends. */
|
||||
|
@ -90,6 +91,28 @@ blorp_blit_get_frag_coords(nir_builder *b,
|
|||
}
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
blorp_blit_get_cs_dst_coords(nir_builder *b,
|
||||
const struct brw_blorp_blit_prog_key *key,
|
||||
struct brw_blorp_blit_vars *v)
|
||||
{
|
||||
nir_ssa_def *coord = nir_load_global_invocation_id(b, 32);
|
||||
|
||||
/* Account for destination surface intratile offset
|
||||
*
|
||||
* Transformation parameters giving translation from destination to source
|
||||
* coordinates don't take into account possible intra-tile destination
|
||||
* offset. Therefore it has to be first subtracted from the incoming
|
||||
* coordinates. Vertices are set up based on coordinates containing the
|
||||
* intra-tile offset.
|
||||
*/
|
||||
if (key->need_dst_offset)
|
||||
coord = nir_isub(b, coord, nir_load_var(b, v->v_dst_offset));
|
||||
|
||||
assert(!key->persample_msaa_dispatch);
|
||||
return nir_channels(b, coord, 0x3);
|
||||
}
|
||||
|
||||
/**
|
||||
* Emit code to translate from destination (X, Y) coordinates to source (X, Y)
|
||||
* coordinates.
|
||||
|
@ -1143,7 +1166,8 @@ convert_color(struct nir_builder *b, nir_ssa_def *color,
|
|||
* of samples).
|
||||
*/
|
||||
static nir_shader *
|
||||
brw_blorp_build_nir_shader(struct blorp_context *blorp, void *mem_ctx,
|
||||
brw_blorp_build_nir_shader(struct blorp_context *blorp,
|
||||
struct blorp_batch *batch, void *mem_ctx,
|
||||
const struct brw_blorp_blit_prog_key *key)
|
||||
{
|
||||
const struct intel_device_info *devinfo = blorp->isl_dev->info;
|
||||
|
@ -1178,12 +1202,18 @@ brw_blorp_build_nir_shader(struct blorp_context *blorp, void *mem_ctx,
|
|||
(key->dst_samples <= 1));
|
||||
|
||||
nir_builder b;
|
||||
blorp_nir_init_shader(&b, mem_ctx, MESA_SHADER_FRAGMENT, NULL);
|
||||
const bool compute =
|
||||
key->base.shader_pipeline == BLORP_SHADER_PIPELINE_COMPUTE;
|
||||
gl_shader_stage stage =
|
||||
compute ? MESA_SHADER_COMPUTE : MESA_SHADER_FRAGMENT;
|
||||
blorp_nir_init_shader(&b, mem_ctx, stage, NULL);
|
||||
|
||||
struct brw_blorp_blit_vars v;
|
||||
brw_blorp_blit_vars_init(&b, &v, key);
|
||||
|
||||
dst_pos = blorp_blit_get_frag_coords(&b, key, &v);
|
||||
dst_pos = compute ?
|
||||
blorp_blit_get_cs_dst_coords(&b, key, &v) :
|
||||
blorp_blit_get_frag_coords(&b, key, &v);
|
||||
|
||||
/* Render target and texture hardware don't support W tiling until Gfx8. */
|
||||
const bool rt_tiled_w = false;
|
||||
|
@ -1235,11 +1265,15 @@ brw_blorp_build_nir_shader(struct blorp_context *blorp, void *mem_ctx,
|
|||
* If we need to kill pixels that are outside the destination rectangle,
|
||||
* now is the time to do it.
|
||||
*/
|
||||
nir_if *bounds_if = NULL;
|
||||
if (key->use_kill) {
|
||||
nir_ssa_def *bounds_rect = nir_load_var(&b, v.v_bounds_rect);
|
||||
nir_ssa_def *in_bounds = blorp_check_in_bounds(&b, bounds_rect,
|
||||
dst_pos);
|
||||
nir_discard_if(&b, nir_inot(&b, in_bounds));
|
||||
if (!compute)
|
||||
nir_discard_if(&b, nir_inot(&b, in_bounds));
|
||||
else
|
||||
bounds_if = nir_push_if(&b, in_bounds);
|
||||
}
|
||||
|
||||
src_pos = blorp_blit_apply_transform(&b, nir_i2f32(&b, dst_pos), &v);
|
||||
|
@ -1433,7 +1467,17 @@ brw_blorp_build_nir_shader(struct blorp_context *blorp, void *mem_ctx,
|
|||
color = nir_vec4(&b, color_component, u, u, u);
|
||||
}
|
||||
|
||||
if (key->dst_usage == ISL_SURF_USAGE_RENDER_TARGET_BIT) {
|
||||
if (compute) {
|
||||
nir_ssa_def *store_pos = nir_load_global_invocation_id(&b, 32);
|
||||
nir_image_store(&b, nir_imm_int(&b, 0),
|
||||
nir_pad_vector_imm_int(&b, store_pos, 0, 4),
|
||||
nir_imm_int(&b, 0),
|
||||
nir_pad_vector_imm_int(&b, color, 0, 4),
|
||||
nir_imm_int(&b, 0),
|
||||
.image_dim = GLSL_SAMPLER_DIM_2D,
|
||||
.image_array = true,
|
||||
.access = ACCESS_NON_READABLE);
|
||||
} else if (key->dst_usage == ISL_SURF_USAGE_RENDER_TARGET_BIT) {
|
||||
nir_variable *color_out =
|
||||
nir_variable_create(b.shader, nir_var_shader_out,
|
||||
glsl_vec4_type(), "gl_FragColor");
|
||||
|
@ -1455,13 +1499,16 @@ brw_blorp_build_nir_shader(struct blorp_context *blorp, void *mem_ctx,
|
|||
unreachable("Invalid destination usage");
|
||||
}
|
||||
|
||||
if (bounds_if)
|
||||
nir_pop_if(&b, bounds_if);
|
||||
|
||||
return b.shader;
|
||||
}
|
||||
|
||||
static bool
|
||||
brw_blorp_get_blit_kernel(struct blorp_batch *batch,
|
||||
struct blorp_params *params,
|
||||
const struct brw_blorp_blit_prog_key *key)
|
||||
brw_blorp_get_blit_kernel_fs(struct blorp_batch *batch,
|
||||
struct blorp_params *params,
|
||||
const struct brw_blorp_blit_prog_key *key)
|
||||
{
|
||||
struct blorp_context *blorp = batch->blorp;
|
||||
|
||||
|
@ -1474,7 +1521,7 @@ brw_blorp_get_blit_kernel(struct blorp_batch *batch,
|
|||
const unsigned *program;
|
||||
struct brw_wm_prog_data prog_data;
|
||||
|
||||
nir_shader *nir = brw_blorp_build_nir_shader(blorp, mem_ctx, key);
|
||||
nir_shader *nir = brw_blorp_build_nir_shader(blorp, batch, mem_ctx, key);
|
||||
nir->info.name =
|
||||
ralloc_strdup(nir, blorp_shader_type_to_name(key->base.shader_type));
|
||||
|
||||
|
@ -1499,6 +1546,47 @@ brw_blorp_get_blit_kernel(struct blorp_batch *batch,
|
|||
return result;
|
||||
}
|
||||
|
||||
static bool
|
||||
brw_blorp_get_blit_kernel_cs(struct blorp_batch *batch,
|
||||
struct blorp_params *params,
|
||||
const struct brw_blorp_blit_prog_key *prog_key)
|
||||
{
|
||||
struct blorp_context *blorp = batch->blorp;
|
||||
|
||||
if (blorp->lookup_shader(batch, prog_key, sizeof(*prog_key),
|
||||
¶ms->cs_prog_kernel, ¶ms->cs_prog_data))
|
||||
return true;
|
||||
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
|
||||
const unsigned *program;
|
||||
struct brw_cs_prog_data prog_data;
|
||||
|
||||
nir_shader *nir = brw_blorp_build_nir_shader(blorp, batch, mem_ctx,
|
||||
prog_key);
|
||||
nir->info.name = ralloc_strdup(nir, "BLORP-gpgpu-blit");
|
||||
blorp_set_cs_dims(nir, prog_key->local_y);
|
||||
|
||||
struct brw_cs_prog_key cs_key;
|
||||
brw_blorp_init_cs_prog_key(&cs_key);
|
||||
cs_key.base.tex.compressed_multisample_layout_mask =
|
||||
prog_key->tex_aux_usage == ISL_AUX_USAGE_MCS;
|
||||
cs_key.base.tex.msaa_16 = prog_key->tex_samples == 16;
|
||||
assert(prog_key->rt_samples == 1);
|
||||
|
||||
program = blorp_compile_cs(blorp, mem_ctx, nir, &cs_key, &prog_data);
|
||||
|
||||
bool result =
|
||||
blorp->upload_shader(batch, MESA_SHADER_COMPUTE,
|
||||
prog_key, sizeof(*prog_key),
|
||||
program, prog_data.base.program_size,
|
||||
&prog_data.base, sizeof(prog_data),
|
||||
¶ms->cs_prog_kernel, ¶ms->cs_prog_data);
|
||||
|
||||
ralloc_free(mem_ctx);
|
||||
return result;
|
||||
}
|
||||
|
||||
static void
|
||||
brw_blorp_setup_coord_transform(struct brw_blorp_coord_transform *xform,
|
||||
GLfloat src0, GLfloat src1,
|
||||
|
@ -2108,11 +2196,21 @@ try_blorp_blit(struct blorp_batch *batch,
|
|||
/* For some texture types, we need to pass the layer through the sampler. */
|
||||
params->wm_inputs.src_z = params->src.z_offset;
|
||||
|
||||
if (!brw_blorp_get_blit_kernel(batch, params, key))
|
||||
return 0;
|
||||
const bool compute =
|
||||
key->base.shader_pipeline == BLORP_SHADER_PIPELINE_COMPUTE;
|
||||
if (compute)
|
||||
key->local_y = blorp_get_cs_local_y(params);
|
||||
|
||||
if (!blorp_ensure_sf_program(batch, params))
|
||||
return 0;
|
||||
if (compute) {
|
||||
if (!brw_blorp_get_blit_kernel_cs(batch, params, key))
|
||||
return 0;
|
||||
} else {
|
||||
if (!brw_blorp_get_blit_kernel_fs(batch, params, key))
|
||||
return 0;
|
||||
|
||||
if (!blorp_ensure_sf_program(batch, params))
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned result = 0;
|
||||
unsigned max_src_surface_size = get_max_surface_size(devinfo, ¶ms->src);
|
||||
|
@ -2319,6 +2417,22 @@ do_blorp_blit(struct blorp_batch *batch,
|
|||
} while (true);
|
||||
}
|
||||
|
||||
bool
|
||||
blorp_blit_supports_compute(struct blorp_context *blorp,
|
||||
enum isl_aux_usage dst_aux_usage)
|
||||
{
|
||||
if (blorp->isl_dev->info->ver >= 12) {
|
||||
return dst_aux_usage == ISL_AUX_USAGE_GFX12_CCS_E ||
|
||||
dst_aux_usage == ISL_AUX_USAGE_CCS_E ||
|
||||
dst_aux_usage == ISL_AUX_USAGE_NONE;
|
||||
} else if (blorp->isl_dev->info->ver >= 7) {
|
||||
return dst_aux_usage == ISL_AUX_USAGE_NONE;
|
||||
} else {
|
||||
/* No compute shader support */
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
blorp_blit(struct blorp_batch *batch,
|
||||
const struct blorp_surf *src_surf,
|
||||
|
@ -2337,6 +2451,9 @@ blorp_blit(struct blorp_batch *batch,
|
|||
struct blorp_params params;
|
||||
blorp_params_init(¶ms);
|
||||
params.snapshot_type = INTEL_SNAPSHOT_BLIT;
|
||||
const bool compute = batch->flags & BLORP_BATCH_USE_COMPUTE;
|
||||
if (compute)
|
||||
assert(blorp_blit_supports_compute(batch->blorp, dst_surf->aux_usage));
|
||||
|
||||
/* We cannot handle combined depth and stencil. */
|
||||
if (src_surf->surf->usage & ISL_SURF_USAGE_STENCIL_BIT)
|
||||
|
@ -2366,6 +2483,8 @@ blorp_blit(struct blorp_batch *batch,
|
|||
|
||||
struct brw_blorp_blit_prog_key key = {
|
||||
.base = BRW_BLORP_BASE_KEY_INIT(BLORP_SHADER_TYPE_BLIT),
|
||||
.base.shader_pipeline = compute ? BLORP_SHADER_PIPELINE_COMPUTE :
|
||||
BLORP_SHADER_PIPELINE_RENDER,
|
||||
.filter = filter,
|
||||
.sint32_to_uint = src_fmtl->channels.r.bits == 32 &&
|
||||
isl_format_has_sint_channel(params.src.view.format) &&
|
||||
|
@ -2639,6 +2758,13 @@ blorp_surf_convert_to_uncompressed(const struct isl_device *isl_dev,
|
|||
info->surf.phys_level0_sa.h += info->tile_y_sa;
|
||||
}
|
||||
|
||||
bool
|
||||
blorp_copy_supports_compute(struct blorp_context *blorp,
|
||||
enum isl_aux_usage dst_aux_usage)
|
||||
{
|
||||
return blorp_blit_supports_compute(blorp, dst_aux_usage);
|
||||
}
|
||||
|
||||
void
|
||||
blorp_copy(struct blorp_batch *batch,
|
||||
const struct blorp_surf *src_surf,
|
||||
|
@ -2657,6 +2783,11 @@ blorp_copy(struct blorp_batch *batch,
|
|||
|
||||
blorp_params_init(¶ms);
|
||||
params.snapshot_type = INTEL_SNAPSHOT_COPY;
|
||||
|
||||
const bool compute = batch->flags & BLORP_BATCH_USE_COMPUTE;
|
||||
if (compute)
|
||||
assert(blorp_copy_supports_compute(batch->blorp, dst_surf->aux_usage));
|
||||
|
||||
brw_blorp_surface_info_init(batch, ¶ms.src, src_surf, src_level,
|
||||
src_layer, ISL_FORMAT_UNSUPPORTED, false);
|
||||
brw_blorp_surface_info_init(batch, ¶ms.dst, dst_surf, dst_level,
|
||||
|
@ -2664,6 +2795,8 @@ blorp_copy(struct blorp_batch *batch,
|
|||
|
||||
struct brw_blorp_blit_prog_key key = {
|
||||
.base = BRW_BLORP_BASE_KEY_INIT(BLORP_SHADER_TYPE_COPY),
|
||||
.base.shader_pipeline = compute ? BLORP_SHADER_PIPELINE_COMPUTE :
|
||||
BLORP_SHADER_PIPELINE_RENDER,
|
||||
.filter = BLORP_FILTER_NONE,
|
||||
.need_src_offset = src_surf->tile_x_sa || src_surf->tile_y_sa,
|
||||
.need_dst_offset = dst_surf->tile_x_sa || dst_surf->tile_y_sa,
|
||||
|
|
|
@ -390,6 +390,10 @@ struct brw_blorp_blit_prog_key
|
|||
*/
|
||||
float x_scale;
|
||||
float y_scale;
|
||||
|
||||
/* If a compute shader is used, this is the local size y dimension.
|
||||
*/
|
||||
uint8_t local_y;
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in New Issue