radv: rework RADV_FORCE_VRS to make it more dynamic

The VRS rates are now emitted from the command buffer via an user SGPR
which will allow to change the rates dynamically in later changes.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14713>
This commit is contained in:
Samuel Pitoiset 2022-01-25 09:24:38 +01:00
parent cbd5724a6d
commit 99d7c13051
4 changed files with 44 additions and 29 deletions

View File

@ -3628,6 +3628,40 @@ radv_flush_ngg_gs_state(struct radv_cmd_buffer *cmd_buffer)
radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, ngg_gs_state);
}
static void
radv_flush_force_vrs_state(struct radv_cmd_buffer *cmd_buffer)
{
struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
const unsigned stage = pipeline->graphics.last_vgt_api_stage;
struct radv_userdata_info *loc;
uint32_t vrs_rates = 0;
uint32_t base_reg;
if (!pipeline->graphics.force_vrs_per_vertex)
return;
loc = radv_lookup_user_sgpr(pipeline, stage, AC_UD_FORCE_VRS_RATES);
assert(loc->sgpr_idx != -1);
base_reg = pipeline->user_data_0[stage];
switch (cmd_buffer->device->force_vrs) {
case RADV_FORCE_VRS_2x2:
vrs_rates = (1u << 2) | (1u << 4);
break;
case RADV_FORCE_VRS_2x1:
vrs_rates = (1u << 2) | (0u << 4);
break;
case RADV_FORCE_VRS_1x2:
vrs_rates = (0u << 2) | (1u << 4);
break;
default:
break;
}
radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, vrs_rates);
}
static void
radv_upload_graphics_shader_descriptors(struct radv_cmd_buffer *cmd_buffer, bool pipeline_is_dirty)
{
@ -3640,6 +3674,7 @@ radv_upload_graphics_shader_descriptors(struct radv_cmd_buffer *cmd_buffer, bool
radv_flush_constants(cmd_buffer, stages, cmd_buffer->state.pipeline,
VK_PIPELINE_BIND_POINT_GRAPHICS);
radv_flush_ngg_gs_state(cmd_buffer);
radv_flush_force_vrs_state(cmd_buffer);
}
struct radv_draw_info {

View File

@ -5783,7 +5783,7 @@ gfx103_pipeline_generate_vrs_state(struct radeon_cmdbuf *ctx_cs,
mode = V_028064_VRS_COMB_MODE_OVERRIDE;
rate_x = rate_y = 1;
} else if (!vk_find_struct_const(pCreateInfo->pNext, PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR) &&
pipeline->device->force_vrs != RADV_FORCE_VRS_NONE &&
pipeline->graphics.force_vrs_per_vertex &&
get_vs_output_info(pipeline)->writes_primitive_shading_rate) {
/* Otherwise, if per-draw VRS is not enabled statically, try forcing per-vertex VRS if
* requested by the user. Note that vkd3d-proton always has to declare VRS as dynamic because
@ -6077,6 +6077,8 @@ radv_pipeline_init(struct radv_pipeline *pipeline, struct radv_device *device,
pipeline->graphics.has_ngg_culling =
pipeline->graphics.is_ngg &&
pipeline->shaders[pipeline->graphics.last_vgt_api_stage]->info.has_ngg_culling;
pipeline->graphics.force_vrs_per_vertex =
pipeline->shaders[pipeline->graphics.last_vgt_api_stage]->info.force_vrs_per_vertex;
pipeline->push_constant_size = pipeline_layout->push_constant_size;
pipeline->dynamic_offset_count = pipeline_layout->dynamic_offset_count;

View File

@ -1873,6 +1873,9 @@ struct radv_pipeline {
/* Last pre-PS API stage */
gl_shader_stage last_vgt_api_stage;
/* Whether the pipeline forces per-vertex VRS (GFX10.3+). */
bool force_vrs_per_vertex;
} graphics;
struct {
struct radv_pipeline_group_handle *rt_group_handles;

View File

@ -461,37 +461,10 @@ radv_force_primitive_shading_rate(nir_shader *nir, struct radv_device *device)
{
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
bool progress = false;
unsigned vrs_rate = 0;
nir_builder b;
nir_builder_init(&b, impl);
/* Bits [2:3] = VRS rate X
* Bits [4:5] = VRS rate Y
*
* The range is [-2, 1]. Values:
* 1: 2x coarser shading rate in that direction.
* 0: normal shading rate
* -1: 2x finer shading rate (sample shading, not directional)
* -2: 4x finer shading rate (sample shading, not directional)
*
* Sample shading can't go above 8 samples, so both numbers can't be -2
* at the same time.
*/
switch (device->force_vrs) {
case RADV_FORCE_VRS_2x2:
vrs_rate = (1u << 2) | (1u << 4);
break;
case RADV_FORCE_VRS_2x1:
vrs_rate = (1u << 2) | (0u << 4);
break;
case RADV_FORCE_VRS_1x2:
vrs_rate = (0u << 2) | (1u << 4);
break;
default:
unreachable("Invalid RADV_FORCE_VRS value");
}
nir_foreach_block_reverse(block, impl) {
nir_foreach_instr_reverse(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
@ -520,9 +493,11 @@ radv_force_primitive_shading_rate(nir_shader *nir, struct radv_device *device)
var->data.location = VARYING_SLOT_PRIMITIVE_SHADING_RATE;
var->data.interpolation = INTERP_MODE_NONE;
nir_ssa_def *vrs_rates = nir_load_force_vrs_rates_amd(&b);
nir_ssa_def *pos_w = nir_channel(&b, intr->src[1].ssa, 3);
nir_ssa_def *val = nir_bcsel(&b, nir_fneu(&b, pos_w, nir_imm_float(&b, 1.0f)),
nir_imm_int(&b, vrs_rate), nir_imm_int(&b, 0));
vrs_rates, nir_imm_int(&b, 0));
nir_deref_instr *deref = nir_build_deref_var(&b, var);
nir_store_deref(&b, deref, val, 0x1);