intel/blorp: Support some image/buffer blit operations using compute

Reworks:
 * Use BLORP_BATCH_USE_COMPUTE flag rather than compute param to
   blorp_copy (s-b Jason)
 * Squash "intel/blorp: Set shader_pipeline for compute"
 * Squash "intel/blorp: Add blorp_copy_supports_compute function"
 * Squash "intel: Support compute for image/buffer copy if INTEL_DEBUG=blocs
           is set"
 * Squash "intel/blorp: Support compute for some blit operations"
 * Use nir_image_store (s-b Jason)
 * Use nir_push_if (s-b Jason)
 * Require gfx12 for ccs in blorp_copy_supports_compute (s-b Jason)
 * Add nir_pop_if (s-b Ken)
 * Fix aux_usage check on gfx12 blorp_copy_supports_compute (s-b Ken)
 * Use blorp_set_cs_dims (s-b Jason)
 * Use dim=2d with array=true for nir_image_store (s-b Jason, Francisco)
 * Restructure gen checks in blorp_copy_supports_compute (s-b Ken)
 * Use nir_load_global_invocation_id (s-b Jason)
 * Fix inefficient calculation of store_pos (s-b Jason)
 * Use bounds_if being NULL/non-NULL for nir_pop_if (s-b Jason)
 * discard => bounds (s-b Ken)
 * Re-add ISL_AUX_USAGE_CCS_E in *_supports_compute (s-b Sagar)
 * Skip duplicated in_bounds calculation (s-b Jason)

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11564>
This commit is contained in:
Jordan Justen 2018-10-26 13:06:03 -07:00 committed by Marge Bot
parent 71c4937833
commit 06691131e9
3 changed files with 158 additions and 13 deletions

View File

@ -178,6 +178,14 @@ blorp_clear_supports_compute(struct blorp_context *blorp,
uint8_t color_write_disable, bool blend_enabled,
enum isl_aux_usage aux_usage);
bool
blorp_copy_supports_compute(struct blorp_context *blorp,
enum isl_aux_usage dst_aux_usage);
bool
blorp_blit_supports_compute(struct blorp_context *blorp,
enum isl_aux_usage dst_aux_usage);
void
blorp_clear(struct blorp_batch *batch,
const struct blorp_surf *surf,

View File

@ -25,6 +25,7 @@
#include "compiler/nir/nir_format_convert.h"
#include "blorp_priv.h"
#include "dev/intel_debug.h"
#include "util/format_rgb9e5.h"
/* header-only include needed for _mesa_unorm_to_float and friends. */
@ -90,6 +91,28 @@ blorp_blit_get_frag_coords(nir_builder *b,
}
}
static nir_ssa_def *
blorp_blit_get_cs_dst_coords(nir_builder *b,
const struct brw_blorp_blit_prog_key *key,
struct brw_blorp_blit_vars *v)
{
nir_ssa_def *coord = nir_load_global_invocation_id(b, 32);
/* Account for destination surface intratile offset
*
* Transformation parameters giving translation from destination to source
* coordinates don't take into account possible intra-tile destination
* offset. Therefore it has to be first subtracted from the incoming
* coordinates. Vertices are set up based on coordinates containing the
* intra-tile offset.
*/
if (key->need_dst_offset)
coord = nir_isub(b, coord, nir_load_var(b, v->v_dst_offset));
assert(!key->persample_msaa_dispatch);
return nir_channels(b, coord, 0x3);
}
/**
* Emit code to translate from destination (X, Y) coordinates to source (X, Y)
* coordinates.
@ -1143,7 +1166,8 @@ convert_color(struct nir_builder *b, nir_ssa_def *color,
* of samples).
*/
static nir_shader *
brw_blorp_build_nir_shader(struct blorp_context *blorp, void *mem_ctx,
brw_blorp_build_nir_shader(struct blorp_context *blorp,
struct blorp_batch *batch, void *mem_ctx,
const struct brw_blorp_blit_prog_key *key)
{
const struct intel_device_info *devinfo = blorp->isl_dev->info;
@ -1178,12 +1202,18 @@ brw_blorp_build_nir_shader(struct blorp_context *blorp, void *mem_ctx,
(key->dst_samples <= 1));
nir_builder b;
blorp_nir_init_shader(&b, mem_ctx, MESA_SHADER_FRAGMENT, NULL);
const bool compute =
key->base.shader_pipeline == BLORP_SHADER_PIPELINE_COMPUTE;
gl_shader_stage stage =
compute ? MESA_SHADER_COMPUTE : MESA_SHADER_FRAGMENT;
blorp_nir_init_shader(&b, mem_ctx, stage, NULL);
struct brw_blorp_blit_vars v;
brw_blorp_blit_vars_init(&b, &v, key);
dst_pos = blorp_blit_get_frag_coords(&b, key, &v);
dst_pos = compute ?
blorp_blit_get_cs_dst_coords(&b, key, &v) :
blorp_blit_get_frag_coords(&b, key, &v);
/* Render target and texture hardware don't support W tiling until Gfx8. */
const bool rt_tiled_w = false;
@ -1235,11 +1265,15 @@ brw_blorp_build_nir_shader(struct blorp_context *blorp, void *mem_ctx,
* If we need to kill pixels that are outside the destination rectangle,
* now is the time to do it.
*/
nir_if *bounds_if = NULL;
if (key->use_kill) {
nir_ssa_def *bounds_rect = nir_load_var(&b, v.v_bounds_rect);
nir_ssa_def *in_bounds = blorp_check_in_bounds(&b, bounds_rect,
dst_pos);
nir_discard_if(&b, nir_inot(&b, in_bounds));
if (!compute)
nir_discard_if(&b, nir_inot(&b, in_bounds));
else
bounds_if = nir_push_if(&b, in_bounds);
}
src_pos = blorp_blit_apply_transform(&b, nir_i2f32(&b, dst_pos), &v);
@ -1433,7 +1467,17 @@ brw_blorp_build_nir_shader(struct blorp_context *blorp, void *mem_ctx,
color = nir_vec4(&b, color_component, u, u, u);
}
if (key->dst_usage == ISL_SURF_USAGE_RENDER_TARGET_BIT) {
if (compute) {
nir_ssa_def *store_pos = nir_load_global_invocation_id(&b, 32);
nir_image_store(&b, nir_imm_int(&b, 0),
nir_pad_vector_imm_int(&b, store_pos, 0, 4),
nir_imm_int(&b, 0),
nir_pad_vector_imm_int(&b, color, 0, 4),
nir_imm_int(&b, 0),
.image_dim = GLSL_SAMPLER_DIM_2D,
.image_array = true,
.access = ACCESS_NON_READABLE);
} else if (key->dst_usage == ISL_SURF_USAGE_RENDER_TARGET_BIT) {
nir_variable *color_out =
nir_variable_create(b.shader, nir_var_shader_out,
glsl_vec4_type(), "gl_FragColor");
@ -1455,13 +1499,16 @@ brw_blorp_build_nir_shader(struct blorp_context *blorp, void *mem_ctx,
unreachable("Invalid destination usage");
}
if (bounds_if)
nir_pop_if(&b, bounds_if);
return b.shader;
}
static bool
brw_blorp_get_blit_kernel(struct blorp_batch *batch,
struct blorp_params *params,
const struct brw_blorp_blit_prog_key *key)
brw_blorp_get_blit_kernel_fs(struct blorp_batch *batch,
struct blorp_params *params,
const struct brw_blorp_blit_prog_key *key)
{
struct blorp_context *blorp = batch->blorp;
@ -1474,7 +1521,7 @@ brw_blorp_get_blit_kernel(struct blorp_batch *batch,
const unsigned *program;
struct brw_wm_prog_data prog_data;
nir_shader *nir = brw_blorp_build_nir_shader(blorp, mem_ctx, key);
nir_shader *nir = brw_blorp_build_nir_shader(blorp, batch, mem_ctx, key);
nir->info.name =
ralloc_strdup(nir, blorp_shader_type_to_name(key->base.shader_type));
@ -1499,6 +1546,47 @@ brw_blorp_get_blit_kernel(struct blorp_batch *batch,
return result;
}
static bool
brw_blorp_get_blit_kernel_cs(struct blorp_batch *batch,
struct blorp_params *params,
const struct brw_blorp_blit_prog_key *prog_key)
{
struct blorp_context *blorp = batch->blorp;
if (blorp->lookup_shader(batch, prog_key, sizeof(*prog_key),
&params->cs_prog_kernel, &params->cs_prog_data))
return true;
void *mem_ctx = ralloc_context(NULL);
const unsigned *program;
struct brw_cs_prog_data prog_data;
nir_shader *nir = brw_blorp_build_nir_shader(blorp, batch, mem_ctx,
prog_key);
nir->info.name = ralloc_strdup(nir, "BLORP-gpgpu-blit");
blorp_set_cs_dims(nir, prog_key->local_y);
struct brw_cs_prog_key cs_key;
brw_blorp_init_cs_prog_key(&cs_key);
cs_key.base.tex.compressed_multisample_layout_mask =
prog_key->tex_aux_usage == ISL_AUX_USAGE_MCS;
cs_key.base.tex.msaa_16 = prog_key->tex_samples == 16;
assert(prog_key->rt_samples == 1);
program = blorp_compile_cs(blorp, mem_ctx, nir, &cs_key, &prog_data);
bool result =
blorp->upload_shader(batch, MESA_SHADER_COMPUTE,
prog_key, sizeof(*prog_key),
program, prog_data.base.program_size,
&prog_data.base, sizeof(prog_data),
&params->cs_prog_kernel, &params->cs_prog_data);
ralloc_free(mem_ctx);
return result;
}
static void
brw_blorp_setup_coord_transform(struct brw_blorp_coord_transform *xform,
GLfloat src0, GLfloat src1,
@ -2108,11 +2196,21 @@ try_blorp_blit(struct blorp_batch *batch,
/* For some texture types, we need to pass the layer through the sampler. */
params->wm_inputs.src_z = params->src.z_offset;
if (!brw_blorp_get_blit_kernel(batch, params, key))
return 0;
const bool compute =
key->base.shader_pipeline == BLORP_SHADER_PIPELINE_COMPUTE;
if (compute)
key->local_y = blorp_get_cs_local_y(params);
if (!blorp_ensure_sf_program(batch, params))
return 0;
if (compute) {
if (!brw_blorp_get_blit_kernel_cs(batch, params, key))
return 0;
} else {
if (!brw_blorp_get_blit_kernel_fs(batch, params, key))
return 0;
if (!blorp_ensure_sf_program(batch, params))
return 0;
}
unsigned result = 0;
unsigned max_src_surface_size = get_max_surface_size(devinfo, &params->src);
@ -2319,6 +2417,22 @@ do_blorp_blit(struct blorp_batch *batch,
} while (true);
}
bool
blorp_blit_supports_compute(struct blorp_context *blorp,
enum isl_aux_usage dst_aux_usage)
{
if (blorp->isl_dev->info->ver >= 12) {
return dst_aux_usage == ISL_AUX_USAGE_GFX12_CCS_E ||
dst_aux_usage == ISL_AUX_USAGE_CCS_E ||
dst_aux_usage == ISL_AUX_USAGE_NONE;
} else if (blorp->isl_dev->info->ver >= 7) {
return dst_aux_usage == ISL_AUX_USAGE_NONE;
} else {
/* No compute shader support */
return false;
}
}
void
blorp_blit(struct blorp_batch *batch,
const struct blorp_surf *src_surf,
@ -2337,6 +2451,9 @@ blorp_blit(struct blorp_batch *batch,
struct blorp_params params;
blorp_params_init(&params);
params.snapshot_type = INTEL_SNAPSHOT_BLIT;
const bool compute = batch->flags & BLORP_BATCH_USE_COMPUTE;
if (compute)
assert(blorp_blit_supports_compute(batch->blorp, dst_surf->aux_usage));
/* We cannot handle combined depth and stencil. */
if (src_surf->surf->usage & ISL_SURF_USAGE_STENCIL_BIT)
@ -2366,6 +2483,8 @@ blorp_blit(struct blorp_batch *batch,
struct brw_blorp_blit_prog_key key = {
.base = BRW_BLORP_BASE_KEY_INIT(BLORP_SHADER_TYPE_BLIT),
.base.shader_pipeline = compute ? BLORP_SHADER_PIPELINE_COMPUTE :
BLORP_SHADER_PIPELINE_RENDER,
.filter = filter,
.sint32_to_uint = src_fmtl->channels.r.bits == 32 &&
isl_format_has_sint_channel(params.src.view.format) &&
@ -2639,6 +2758,13 @@ blorp_surf_convert_to_uncompressed(const struct isl_device *isl_dev,
info->surf.phys_level0_sa.h += info->tile_y_sa;
}
bool
blorp_copy_supports_compute(struct blorp_context *blorp,
enum isl_aux_usage dst_aux_usage)
{
return blorp_blit_supports_compute(blorp, dst_aux_usage);
}
void
blorp_copy(struct blorp_batch *batch,
const struct blorp_surf *src_surf,
@ -2657,6 +2783,11 @@ blorp_copy(struct blorp_batch *batch,
blorp_params_init(&params);
params.snapshot_type = INTEL_SNAPSHOT_COPY;
const bool compute = batch->flags & BLORP_BATCH_USE_COMPUTE;
if (compute)
assert(blorp_copy_supports_compute(batch->blorp, dst_surf->aux_usage));
brw_blorp_surface_info_init(batch, &params.src, src_surf, src_level,
src_layer, ISL_FORMAT_UNSUPPORTED, false);
brw_blorp_surface_info_init(batch, &params.dst, dst_surf, dst_level,
@ -2664,6 +2795,8 @@ blorp_copy(struct blorp_batch *batch,
struct brw_blorp_blit_prog_key key = {
.base = BRW_BLORP_BASE_KEY_INIT(BLORP_SHADER_TYPE_COPY),
.base.shader_pipeline = compute ? BLORP_SHADER_PIPELINE_COMPUTE :
BLORP_SHADER_PIPELINE_RENDER,
.filter = BLORP_FILTER_NONE,
.need_src_offset = src_surf->tile_x_sa || src_surf->tile_y_sa,
.need_dst_offset = dst_surf->tile_x_sa || dst_surf->tile_y_sa,

View File

@ -390,6 +390,10 @@ struct brw_blorp_blit_prog_key
*/
float x_scale;
float y_scale;
/* If a compute shader is used, this is the local size y dimension.
*/
uint8_t local_y;
};
/**