radv: allow to force VRS rates on GFX10.3 with RADV_FORCE_VRS

This allows to force the VRS rates via RADV_FORCE_VRS, the supported
values are 2x2, 1x2 and 2x1. This supports the primitive shading rate
mode for non GUI elements.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7794>
This commit is contained in:
Samuel Pitoiset 2021-01-19 16:12:01 +01:00
parent 549f41754a
commit 1ad295ed6f
8 changed files with 117 additions and 7 deletions

View File

@ -622,6 +622,12 @@ RADV driver environment variables
``RADV_FORCE_FAMILY``
create a null device to compile shaders without a AMD GPU (e.g. vega10)
``RADV_FORCE_VRS``
allow to force per-pipeline vertex VRS rates on GFX10.3+. This is only
forced for pipelines that don't explicitely use VRS or flat shading.
The supported values are 2x2, 1x2 and 2x1. Only for testing purposes.
``RADV_PERFTEST``
a comma-separated list of named flags, which do various things:

View File

@ -10289,6 +10289,31 @@ static void export_vs_psiz_layer_viewport_vrs(isel_context *ctx, int *next_pos)
exp->operands[1] = Operand(out);
exp->enabled_mask |= 0x2;
} else if (ctx->options->force_vrs_rates) {
/* Bits [2:3] = VRS rate X
* Bits [4:5] = VRS rate Y
*
* The range is [-2, 1]. Values:
* 1: 2x coarser shading rate in that direction.
* 0: normal shading rate
* -1: 2x finer shading rate (sample shading, not directional)
* -2: 4x finer shading rate (sample shading, not directional)
*
* Sample shading can't go above 8 samples, so both numbers can't be -2
* at the same time.
*/
Builder bld(ctx->program, ctx->block);
Temp rates = bld.copy(bld.def(v1), Operand((unsigned)ctx->options->force_vrs_rates));
/* If Pos.W != 1 (typical for non-GUI elements), use 2x2 coarse shading. */
Temp cond = bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm),
Operand(0x3f800000u),
Operand(ctx->outputs.temps[VARYING_SLOT_POS + 3]));
rates = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1),
bld.copy(bld.def(v1), Operand(0u)), rates, cond);
exp->operands[1] = Operand(rates);
exp->enabled_mask |= 0x2;
}
exp->valid_mask = ctx->options->chip_class == GFX10 && *next_pos == 0;
@ -10354,8 +10379,11 @@ static void create_vs_exports(isel_context *ctx)
/* the order these position exports are created is important */
int next_pos = 0;
export_vs_varying(ctx, VARYING_SLOT_POS, true, &next_pos);
bool writes_primitive_shading_rate = outinfo->writes_primitive_shading_rate ||
ctx->options->force_vrs_rates;
if (outinfo->writes_pointsize || outinfo->writes_layer || outinfo->writes_viewport_index ||
outinfo->writes_primitive_shading_rate) {
writes_primitive_shading_rate) {
export_vs_psiz_layer_viewport_vrs(ctx, &next_pos);
}
if (ctx->num_clip_distances + ctx->num_cull_distances > 0)

View File

@ -328,8 +328,10 @@ setup_vs_output_info(isel_context *ctx, nir_shader *nir,
outinfo->param_exports = 0;
int pos_written = 0x1;
bool writes_primitive_shading_rate = outinfo->writes_primitive_shading_rate ||
ctx->options->force_vrs_rates;
if (outinfo->writes_pointsize || outinfo->writes_viewport_index || outinfo->writes_layer ||
outinfo->writes_primitive_shading_rate)
writes_primitive_shading_rate)
pos_written |= 1 << 1;
uint64_t mask = nir->info.outputs_written;

View File

@ -2861,7 +2861,8 @@ VkResult radv_CreateDevice(
device->robust_buffer_access2 = robust_buffer_access2;
device->adjust_frag_coord_z = (vrs_enabled ||
device->vk.enabled_extensions.KHR_fragment_shading_rate) &&
device->vk.enabled_extensions.KHR_fragment_shading_rate ||
device->force_vrs != RADV_FORCE_VRS_NONE) &&
(device->physical_device->rad_info.family == CHIP_SIENNA_CICHLID ||
device->physical_device->rad_info.family == CHIP_NAVY_FLOUNDER ||
device->physical_device->rad_info.family == CHIP_VANGOGH);
@ -3013,6 +3014,24 @@ VkResult radv_CreateDevice(
goto fail;
}
if (getenv("RADV_FORCE_VRS")) {
const char *vrs_rates = getenv("RADV_FORCE_VRS");
if (device->physical_device->rad_info.chip_class < GFX10_3)
fprintf(stderr, "radv: VRS is only supported on RDNA2+\n");
else if (device->physical_device->use_llvm)
fprintf(stderr, "radv: Forcing VRS rates is only supported with ACO\n");
else if (!strcmp(vrs_rates, "2x2"))
device->force_vrs = RADV_FORCE_VRS_2x2;
else if (!strcmp(vrs_rates, "2x1"))
device->force_vrs = RADV_FORCE_VRS_2x1;
else if (!strcmp(vrs_rates, "1x2"))
device->force_vrs = RADV_FORCE_VRS_1x2;
else
fprintf(stderr, "radv: Invalid VRS rates specified "
"(valid values are 2x2, 2x1 and 1x2)\n");
}
device->keep_shader_info = keep_shader_info;
result = radv_device_init_meta(device);
if (result != VK_SUCCESS)

View File

@ -227,6 +227,12 @@ static uint32_t get_hash_flags(const struct radv_device *device, bool stats)
hash_flags |= RADV_HASH_SHADER_INVARIANT_GEOM;
if (stats)
hash_flags |= RADV_HASH_SHADER_KEEP_STATISTICS;
if (device->force_vrs != RADV_FORCE_VRS_2x2)
hash_flags |= RADV_HASH_SHADER_FORCE_VRS_2x2;
if (device->force_vrs != RADV_FORCE_VRS_2x1)
hash_flags |= RADV_HASH_SHADER_FORCE_VRS_2x1;
if (device->force_vrs != RADV_FORCE_VRS_1x2)
hash_flags |= RADV_HASH_SHADER_FORCE_VRS_1x2;
return hash_flags;
}
@ -4438,10 +4444,13 @@ radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs,
clip_dist_mask = outinfo->clip_dist_mask;
cull_dist_mask = outinfo->cull_dist_mask;
total_mask = clip_dist_mask | cull_dist_mask;
bool writes_primitive_shading_rate = outinfo->writes_primitive_shading_rate ||
pipeline->device->force_vrs != RADV_FORCE_VRS_NONE;
bool misc_vec_ena = outinfo->writes_pointsize ||
outinfo->writes_layer ||
outinfo->writes_viewport_index ||
outinfo->writes_primitive_shading_rate;
writes_primitive_shading_rate;
unsigned spi_vs_out_config, nparams;
/* VS is required to export at least one param. */
@ -4470,7 +4479,7 @@ radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs,
S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) |
S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) |
S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) |
S_02881C_USE_VTX_VRS_RATE(outinfo->writes_primitive_shading_rate) |
S_02881C_USE_VTX_VRS_RATE(writes_primitive_shading_rate) |
S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
@ -4545,10 +4554,13 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs,
clip_dist_mask = outinfo->clip_dist_mask;
cull_dist_mask = outinfo->cull_dist_mask;
total_mask = clip_dist_mask | cull_dist_mask;
bool writes_primitive_shading_rate = outinfo->writes_primitive_shading_rate ||
pipeline->device->force_vrs != RADV_FORCE_VRS_NONE;
bool misc_vec_ena = outinfo->writes_pointsize ||
outinfo->writes_layer ||
outinfo->writes_viewport_index ||
outinfo->writes_primitive_shading_rate;
writes_primitive_shading_rate;
bool es_enable_prim_id = outinfo->export_prim_id ||
(es && es->info.uses_prim_id);
bool break_wave_at_eoi = false;
@ -4586,7 +4598,7 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs,
S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) |
S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) |
S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) |
S_02881C_USE_VTX_VRS_RATE(outinfo->writes_primitive_shading_rate) |
S_02881C_USE_VTX_VRS_RATE(writes_primitive_shading_rate) |
S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
@ -5332,6 +5344,20 @@ gfx103_pipeline_generate_vrs_state(struct radeon_cmdbuf *ctx_cs,
*/
mode = V_028064_VRS_COMB_MODE_OVERRIDE;
rate_x = rate_y = 1;
} else if (pipeline->device->force_vrs != RADV_FORCE_VRS_NONE) {
/* Force enable vertex VRS if requested by the user. */
radeon_set_context_reg(ctx_cs, R_028848_PA_CL_VRS_CNTL,
S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE) |
S_028848_VERTEX_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE));
/* If the shader is using discard, turn off coarse shading
* because discard at 2x2 pixel granularity degrades quality
* too much. MIN allows sample shading but not coarse shading.
*/
struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
mode = ps->info.ps.can_discard ? V_028064_VRS_COMB_MODE_MIN
: V_028064_VRS_COMB_MODE_PASSTHRU;
}
radeon_set_context_reg(ctx_cs, R_028A98_VGT_DRAW_PAYLOAD_CNTL,

View File

@ -731,6 +731,14 @@ struct radv_device_border_color_data {
mtx_t mutex;
};
enum radv_force_vrs
{
RADV_FORCE_VRS_NONE = 0,
RADV_FORCE_VRS_2x2,
RADV_FORCE_VRS_2x1,
RADV_FORCE_VRS_1x2,
};
struct radv_device {
struct vk_device vk;
@ -823,6 +831,9 @@ struct radv_device {
/* Track the number of device loss occurs. */
int lost;
/* Whether the user forced VRS rates on GFX10.3+. */
enum radv_force_vrs force_vrs;
};
VkResult _radv_device_set_lost(struct radv_device *device,
@ -1643,6 +1654,9 @@ struct radv_event {
#define RADV_HASH_SHADER_MRT_NAN_FIXUP (1 << 6)
#define RADV_HASH_SHADER_INVARIANT_GEOM (1 << 7)
#define RADV_HASH_SHADER_KEEP_STATISTICS (1 << 8)
#define RADV_HASH_SHADER_FORCE_VRS_2x2 (1 << 9)
#define RADV_HASH_SHADER_FORCE_VRS_2x1 (1 << 10)
#define RADV_HASH_SHADER_FORCE_VRS_1x2 (1 << 11)
void
radv_hash_shaders(unsigned char *hash,

View File

@ -1420,6 +1420,20 @@ shader_variant_compile(struct radv_device *device,
options->debug.func = radv_compiler_debug;
options->debug.private_data = &debug_data;
switch (device->force_vrs) {
case RADV_FORCE_VRS_2x2:
options->force_vrs_rates = (1u << 2) | (1u << 4);
break;
case RADV_FORCE_VRS_2x1:
options->force_vrs_rates = (0u << 2) | (1u << 4);
break;
case RADV_FORCE_VRS_1x2:
options->force_vrs_rates = (1u << 2) | (0u << 4);
break;
default:
break;
}
struct radv_shader_args args = {0};
args.options = options;
args.shader_info = info;

View File

@ -142,6 +142,7 @@ struct radv_nir_compiler_options {
const struct radeon_info *info;
uint32_t tess_offchip_block_dw_size;
uint32_t address32_hi;
uint8_t force_vrs_rates;
struct {
void (*func)(void *private_data,