panvk: Lower blend operations when needed

The gallium driver makes use of blend shaders, but panvk takes a
slightly different approach. Vulkan drivers are passed the blend
operation at pipeline creation time, which means they know it when
compiling the fragment shader and can lower the blend operation
directly in the fragment shader itself. Doing that simplifies the
pipeline creation since we don't have to deal with blend shaders
anymore.

This might come at a cost for translation layers like Zink though,
since it requires re-compiling the fragment shader every time the
blend operation changes, which we do anyway, since we don't have
a pipeline cache yet. Let's keep things simple for now and revise
things if/when we end up having performance issues.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Suggested-by: Alyssa Rosenzweig <alyssa@collabora.com>
Reviewed-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13060>
This commit is contained in:
Boris Brezillon 2021-09-06 12:12:15 +02:00
parent 9db47ac96e
commit 4f484e6d9b
1 changed files with 170 additions and 13 deletions

View File

@ -30,11 +30,13 @@
#include "nir_builder.h"
#include "nir_lower_blend.h"
#include "nir_conversion_builder.h"
#include "spirv/nir_spirv.h"
#include "util/mesa-sha1.h"
#include "panfrost-quirks.h"
#include "pan_shader.h"
#include "util/pan_lower_framebuffer.h"
#include "vk_util.h"
@ -216,9 +218,93 @@ panvk_lower_misc(nir_shader *nir, const struct panvk_lower_misc_ctx *ctx)
(void *)ctx);
}
static bool
panvk_inline_blend_constants(nir_builder *b, nir_instr *instr, void *data)
{
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
if (intr->intrinsic != nir_intrinsic_load_blend_const_color_rgba)
return false;
const nir_const_value *constants = data;
b->cursor = nir_after_instr(instr);
nir_ssa_def *constant = nir_build_imm(b, 4, 32, constants);
nir_ssa_def_rewrite_uses(&intr->dest.ssa, constant);
nir_instr_remove(instr);
return true;
}
#if PAN_ARCH <= 5
struct panvk_lower_blend_type_conv {
nir_variable *var;
nir_alu_type newtype;
nir_alu_type oldtype;
};
static bool
panvk_adjust_rt_type(nir_builder *b, nir_instr *instr, void *data)
{
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
if (intr->intrinsic != nir_intrinsic_store_deref &&
intr->intrinsic != nir_intrinsic_load_deref)
return false;
nir_variable *var = nir_intrinsic_get_var(intr, 0);
if (var->data.mode != nir_var_shader_out ||
(var->data.location != FRAG_RESULT_COLOR &&
var->data.location < FRAG_RESULT_DATA0))
return false;
/* Determine render target for per-RT blending */
unsigned rt =
(var->data.location == FRAG_RESULT_COLOR) ? 0 :
(var->data.location - FRAG_RESULT_DATA0);
const struct panvk_lower_blend_type_conv *typeconv = data;
nir_alu_type newtype = typeconv[rt].newtype;
nir_alu_type oldtype = typeconv[rt].oldtype;
/* No conversion */
if (newtype == nir_type_invalid || newtype == oldtype)
return false;
b->cursor = nir_before_instr(instr);
nir_deref_instr *deref = nir_build_deref_var(b, typeconv[rt].var);
nir_instr_rewrite_src(&intr->instr, &intr->src[0],
nir_src_for_ssa(&deref->dest.ssa));
if (intr->intrinsic == nir_intrinsic_store_deref) {
nir_ssa_def *val = nir_ssa_for_src(b, intr->src[1], 4);
bool clamp = nir_alu_type_get_base_type(newtype) != nir_type_float;
val = nir_convert_with_rounding(b, val, oldtype, newtype,
nir_rounding_mode_undef, clamp);
nir_store_var(b, typeconv[rt].var, val, nir_intrinsic_write_mask(intr));
} else {
bool clamp = nir_alu_type_get_base_type(oldtype) != nir_type_float;
nir_ssa_def *val = nir_load_var(b, typeconv[rt].var);
val = nir_convert_with_rounding(b, val, newtype, oldtype,
nir_rounding_mode_undef, clamp);
nir_ssa_def_rewrite_uses(&intr->dest.ssa, val);
}
nir_instr_remove(instr);
return true;
}
#endif
static void
panvk_lower_blend(struct panfrost_device *pdev,
nir_shader *nir,
struct panfrost_compile_inputs *inputs,
struct pan_blend_state *blend_state,
bool static_blend_constants)
{
@ -227,14 +313,22 @@ panvk_lower_blend(struct panfrost_device *pdev,
.logicop_func = blend_state->logicop_func,
};
#if PAN_ARCH <= 5
struct panvk_lower_blend_type_conv typeconv[8] = { 0 };
#endif
bool lower_blend = false;
for (unsigned rt = 0; rt < blend_state->rt_count; rt++) {
struct pan_blend_rt_state *rt_state = &blend_state->rts[rt];
if (!panvk_per_arch(blend_needs_lowering)(pdev, blend_state, rt))
continue;
const struct pan_blend_rt_state *rt_state = &blend_state->rts[rt];
enum pipe_format fmt = rt_state->format;
options.format[rt] = fmt;
options.rt[rt].colormask = rt_state->equation.color_mask;
options.format[rt] = rt_state->format;
if (!rt_state->equation.blend_enable) {
static const nir_lower_blend_channel replace = {
.func = BLEND_FUNC_ADD,
@ -259,14 +353,77 @@ panvk_lower_blend(struct panfrost_device *pdev,
options.rt[rt].alpha.invert_dst_factor = rt_state->equation.alpha_invert_dst_factor;
}
/* Update the equation to force a color replacement */
rt_state->equation.color_mask = 0xf;
rt_state->equation.rgb_func = BLEND_FUNC_ADD;
rt_state->equation.rgb_src_factor = BLEND_FACTOR_ZERO;
rt_state->equation.rgb_invert_src_factor = true;
rt_state->equation.rgb_dst_factor = BLEND_FACTOR_ZERO;
rt_state->equation.rgb_invert_dst_factor = false;
rt_state->equation.alpha_func = BLEND_FUNC_ADD;
rt_state->equation.alpha_src_factor = BLEND_FACTOR_ZERO;
rt_state->equation.alpha_invert_src_factor = true;
rt_state->equation.alpha_dst_factor = BLEND_FACTOR_ZERO;
rt_state->equation.alpha_invert_dst_factor = false;
lower_blend = true;
#if PAN_ARCH >= 6
inputs->bifrost.static_rt_conv = true;
inputs->bifrost.rt_conv[rt] =
GENX(pan_blend_get_internal_desc)(pdev, fmt, rt, 32, false) >> 32;
#else
if (!panfrost_blendable_formats_v6[fmt].internal) {
nir_variable *outvar =
nir_find_variable_with_location(nir, nir_var_shader_out, FRAG_RESULT_DATA0 + rt);
if (!outvar && !rt)
outvar = nir_find_variable_with_location(nir, nir_var_shader_out, FRAG_RESULT_COLOR);
assert(outvar);
const struct util_format_description *format_desc =
util_format_description(fmt);
typeconv[rt].newtype = pan_unpacked_type_for_format(format_desc);
typeconv[rt].oldtype = nir_get_nir_type_for_glsl_type(outvar->type);
typeconv[rt].var =
nir_variable_create(nir, nir_var_shader_out,
glsl_vector_type(nir_get_glsl_base_type_for_nir_type(typeconv[rt].newtype),
glsl_get_vector_elements(outvar->type)),
outvar->name);
typeconv[rt].var->data.location = outvar->data.location;
inputs->blend.nr_samples = rt_state->nr_samples;
inputs->rt_formats[rt] = rt_state->format;
}
#endif
}
/* FIXME: currently untested */
assert(!lower_blend);
if (lower_blend) {
#if PAN_ARCH <= 5
NIR_PASS_V(nir, nir_shader_instructions_pass,
panvk_adjust_rt_type,
nir_metadata_block_index |
nir_metadata_dominance,
&typeconv);
nir_remove_dead_derefs(nir);
nir_remove_dead_variables(nir, nir_var_shader_out, NULL);
#endif
if (lower_blend)
NIR_PASS_V(nir, nir_lower_blend, options);
if (static_blend_constants) {
const nir_const_value constants[4] = {
{ .f32 = CLAMP(blend_state->constants[0], 0.0f, 1.0f) },
{ .f32 = CLAMP(blend_state->constants[1], 0.0f, 1.0f) },
{ .f32 = CLAMP(blend_state->constants[2], 0.0f, 1.0f) },
{ .f32 = CLAMP(blend_state->constants[3], 0.0f, 1.0f) },
};
NIR_PASS_V(nir, nir_shader_instructions_pass,
panvk_inline_blend_constants,
nir_metadata_block_index |
nir_metadata_dominance,
(void *)constants);
}
}
}
struct panvk_shader *
@ -302,8 +459,11 @@ panvk_per_arch(shader_create)(struct panvk_device *dev,
return NULL;
}
if (stage == MESA_SHADER_FRAGMENT)
panvk_lower_blend(pdev, nir, blend_state, static_blend_constants);
struct panfrost_compile_inputs inputs = {
.gpu_id = pdev->gpu_id,
.no_ubo_to_push = true,
.sysval_ubo = sysval_ubo,
};
/* multi step inlining procedure */
NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
@ -339,6 +499,9 @@ panvk_per_arch(shader_create)(struct panvk_device *dev,
NIR_PASS_V(nir, nir_opt_copy_prop_vars);
NIR_PASS_V(nir, nir_opt_combine_stores, nir_var_all);
if (stage == MESA_SHADER_FRAGMENT)
panvk_lower_blend(pdev, nir, &inputs, blend_state, static_blend_constants);
NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, true, false);
NIR_PASS_V(nir, nir_lower_explicit_io,
nir_var_mem_ubo | nir_var_mem_ssbo,
@ -365,12 +528,6 @@ panvk_per_arch(shader_create)(struct panvk_device *dev,
nir_print_shader(nir, stderr);
}
struct panfrost_compile_inputs inputs = {
.gpu_id = pdev->gpu_id,
.no_ubo_to_push = true,
.sysval_ubo = sysval_ubo,
};
GENX(pan_shader_compile)(nir, &inputs, &shader->binary, &shader->info);
/* Patch the descriptor count */