radv: allow to force VRS rates on GFX10.3 with RADV_FORCE_VRS
This allows to force the VRS rates via RADV_FORCE_VRS, the supported values are 2x2, 1x2 and 2x1. This supports the primitive shading rate mode for non GUI elements. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7794>
This commit is contained in:
parent
549f41754a
commit
1ad295ed6f
|
@ -622,6 +622,12 @@ RADV driver environment variables
|
|||
|
||||
``RADV_FORCE_FAMILY``
|
||||
create a null device to compile shaders without a AMD GPU (e.g. vega10)
|
||||
|
||||
``RADV_FORCE_VRS``
|
||||
allow to force per-pipeline vertex VRS rates on GFX10.3+. This is only
|
||||
forced for pipelines that don't explicitely use VRS or flat shading.
|
||||
The supported values are 2x2, 1x2 and 2x1. Only for testing purposes.
|
||||
|
||||
``RADV_PERFTEST``
|
||||
a comma-separated list of named flags, which do various things:
|
||||
|
||||
|
|
|
@ -10289,6 +10289,31 @@ static void export_vs_psiz_layer_viewport_vrs(isel_context *ctx, int *next_pos)
|
|||
|
||||
exp->operands[1] = Operand(out);
|
||||
exp->enabled_mask |= 0x2;
|
||||
} else if (ctx->options->force_vrs_rates) {
|
||||
/* Bits [2:3] = VRS rate X
|
||||
* Bits [4:5] = VRS rate Y
|
||||
*
|
||||
* The range is [-2, 1]. Values:
|
||||
* 1: 2x coarser shading rate in that direction.
|
||||
* 0: normal shading rate
|
||||
* -1: 2x finer shading rate (sample shading, not directional)
|
||||
* -2: 4x finer shading rate (sample shading, not directional)
|
||||
*
|
||||
* Sample shading can't go above 8 samples, so both numbers can't be -2
|
||||
* at the same time.
|
||||
*/
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
Temp rates = bld.copy(bld.def(v1), Operand((unsigned)ctx->options->force_vrs_rates));
|
||||
|
||||
/* If Pos.W != 1 (typical for non-GUI elements), use 2x2 coarse shading. */
|
||||
Temp cond = bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm),
|
||||
Operand(0x3f800000u),
|
||||
Operand(ctx->outputs.temps[VARYING_SLOT_POS + 3]));
|
||||
rates = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1),
|
||||
bld.copy(bld.def(v1), Operand(0u)), rates, cond);
|
||||
|
||||
exp->operands[1] = Operand(rates);
|
||||
exp->enabled_mask |= 0x2;
|
||||
}
|
||||
|
||||
exp->valid_mask = ctx->options->chip_class == GFX10 && *next_pos == 0;
|
||||
|
@ -10354,8 +10379,11 @@ static void create_vs_exports(isel_context *ctx)
|
|||
/* the order these position exports are created is important */
|
||||
int next_pos = 0;
|
||||
export_vs_varying(ctx, VARYING_SLOT_POS, true, &next_pos);
|
||||
|
||||
bool writes_primitive_shading_rate = outinfo->writes_primitive_shading_rate ||
|
||||
ctx->options->force_vrs_rates;
|
||||
if (outinfo->writes_pointsize || outinfo->writes_layer || outinfo->writes_viewport_index ||
|
||||
outinfo->writes_primitive_shading_rate) {
|
||||
writes_primitive_shading_rate) {
|
||||
export_vs_psiz_layer_viewport_vrs(ctx, &next_pos);
|
||||
}
|
||||
if (ctx->num_clip_distances + ctx->num_cull_distances > 0)
|
||||
|
|
|
@ -328,8 +328,10 @@ setup_vs_output_info(isel_context *ctx, nir_shader *nir,
|
|||
|
||||
outinfo->param_exports = 0;
|
||||
int pos_written = 0x1;
|
||||
bool writes_primitive_shading_rate = outinfo->writes_primitive_shading_rate ||
|
||||
ctx->options->force_vrs_rates;
|
||||
if (outinfo->writes_pointsize || outinfo->writes_viewport_index || outinfo->writes_layer ||
|
||||
outinfo->writes_primitive_shading_rate)
|
||||
writes_primitive_shading_rate)
|
||||
pos_written |= 1 << 1;
|
||||
|
||||
uint64_t mask = nir->info.outputs_written;
|
||||
|
|
|
@ -2861,7 +2861,8 @@ VkResult radv_CreateDevice(
|
|||
device->robust_buffer_access2 = robust_buffer_access2;
|
||||
|
||||
device->adjust_frag_coord_z = (vrs_enabled ||
|
||||
device->vk.enabled_extensions.KHR_fragment_shading_rate) &&
|
||||
device->vk.enabled_extensions.KHR_fragment_shading_rate ||
|
||||
device->force_vrs != RADV_FORCE_VRS_NONE) &&
|
||||
(device->physical_device->rad_info.family == CHIP_SIENNA_CICHLID ||
|
||||
device->physical_device->rad_info.family == CHIP_NAVY_FLOUNDER ||
|
||||
device->physical_device->rad_info.family == CHIP_VANGOGH);
|
||||
|
@ -3013,6 +3014,24 @@ VkResult radv_CreateDevice(
|
|||
goto fail;
|
||||
}
|
||||
|
||||
if (getenv("RADV_FORCE_VRS")) {
|
||||
const char *vrs_rates = getenv("RADV_FORCE_VRS");
|
||||
|
||||
if (device->physical_device->rad_info.chip_class < GFX10_3)
|
||||
fprintf(stderr, "radv: VRS is only supported on RDNA2+\n");
|
||||
else if (device->physical_device->use_llvm)
|
||||
fprintf(stderr, "radv: Forcing VRS rates is only supported with ACO\n");
|
||||
else if (!strcmp(vrs_rates, "2x2"))
|
||||
device->force_vrs = RADV_FORCE_VRS_2x2;
|
||||
else if (!strcmp(vrs_rates, "2x1"))
|
||||
device->force_vrs = RADV_FORCE_VRS_2x1;
|
||||
else if (!strcmp(vrs_rates, "1x2"))
|
||||
device->force_vrs = RADV_FORCE_VRS_1x2;
|
||||
else
|
||||
fprintf(stderr, "radv: Invalid VRS rates specified "
|
||||
"(valid values are 2x2, 2x1 and 1x2)\n");
|
||||
}
|
||||
|
||||
device->keep_shader_info = keep_shader_info;
|
||||
result = radv_device_init_meta(device);
|
||||
if (result != VK_SUCCESS)
|
||||
|
|
|
@ -227,6 +227,12 @@ static uint32_t get_hash_flags(const struct radv_device *device, bool stats)
|
|||
hash_flags |= RADV_HASH_SHADER_INVARIANT_GEOM;
|
||||
if (stats)
|
||||
hash_flags |= RADV_HASH_SHADER_KEEP_STATISTICS;
|
||||
if (device->force_vrs != RADV_FORCE_VRS_2x2)
|
||||
hash_flags |= RADV_HASH_SHADER_FORCE_VRS_2x2;
|
||||
if (device->force_vrs != RADV_FORCE_VRS_2x1)
|
||||
hash_flags |= RADV_HASH_SHADER_FORCE_VRS_2x1;
|
||||
if (device->force_vrs != RADV_FORCE_VRS_1x2)
|
||||
hash_flags |= RADV_HASH_SHADER_FORCE_VRS_1x2;
|
||||
return hash_flags;
|
||||
}
|
||||
|
||||
|
@ -4438,10 +4444,13 @@ radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs,
|
|||
clip_dist_mask = outinfo->clip_dist_mask;
|
||||
cull_dist_mask = outinfo->cull_dist_mask;
|
||||
total_mask = clip_dist_mask | cull_dist_mask;
|
||||
|
||||
bool writes_primitive_shading_rate = outinfo->writes_primitive_shading_rate ||
|
||||
pipeline->device->force_vrs != RADV_FORCE_VRS_NONE;
|
||||
bool misc_vec_ena = outinfo->writes_pointsize ||
|
||||
outinfo->writes_layer ||
|
||||
outinfo->writes_viewport_index ||
|
||||
outinfo->writes_primitive_shading_rate;
|
||||
writes_primitive_shading_rate;
|
||||
unsigned spi_vs_out_config, nparams;
|
||||
|
||||
/* VS is required to export at least one param. */
|
||||
|
@ -4470,7 +4479,7 @@ radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs,
|
|||
S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) |
|
||||
S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) |
|
||||
S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) |
|
||||
S_02881C_USE_VTX_VRS_RATE(outinfo->writes_primitive_shading_rate) |
|
||||
S_02881C_USE_VTX_VRS_RATE(writes_primitive_shading_rate) |
|
||||
S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
|
||||
S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
|
||||
S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
|
||||
|
@ -4545,10 +4554,13 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs,
|
|||
clip_dist_mask = outinfo->clip_dist_mask;
|
||||
cull_dist_mask = outinfo->cull_dist_mask;
|
||||
total_mask = clip_dist_mask | cull_dist_mask;
|
||||
|
||||
bool writes_primitive_shading_rate = outinfo->writes_primitive_shading_rate ||
|
||||
pipeline->device->force_vrs != RADV_FORCE_VRS_NONE;
|
||||
bool misc_vec_ena = outinfo->writes_pointsize ||
|
||||
outinfo->writes_layer ||
|
||||
outinfo->writes_viewport_index ||
|
||||
outinfo->writes_primitive_shading_rate;
|
||||
writes_primitive_shading_rate;
|
||||
bool es_enable_prim_id = outinfo->export_prim_id ||
|
||||
(es && es->info.uses_prim_id);
|
||||
bool break_wave_at_eoi = false;
|
||||
|
@ -4586,7 +4598,7 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs,
|
|||
S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) |
|
||||
S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) |
|
||||
S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) |
|
||||
S_02881C_USE_VTX_VRS_RATE(outinfo->writes_primitive_shading_rate) |
|
||||
S_02881C_USE_VTX_VRS_RATE(writes_primitive_shading_rate) |
|
||||
S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
|
||||
S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
|
||||
S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
|
||||
|
@ -5332,6 +5344,20 @@ gfx103_pipeline_generate_vrs_state(struct radeon_cmdbuf *ctx_cs,
|
|||
*/
|
||||
mode = V_028064_VRS_COMB_MODE_OVERRIDE;
|
||||
rate_x = rate_y = 1;
|
||||
} else if (pipeline->device->force_vrs != RADV_FORCE_VRS_NONE) {
|
||||
/* Force enable vertex VRS if requested by the user. */
|
||||
radeon_set_context_reg(ctx_cs, R_028848_PA_CL_VRS_CNTL,
|
||||
S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE) |
|
||||
S_028848_VERTEX_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE));
|
||||
|
||||
/* If the shader is using discard, turn off coarse shading
|
||||
* because discard at 2x2 pixel granularity degrades quality
|
||||
* too much. MIN allows sample shading but not coarse shading.
|
||||
*/
|
||||
struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
|
||||
|
||||
mode = ps->info.ps.can_discard ? V_028064_VRS_COMB_MODE_MIN
|
||||
: V_028064_VRS_COMB_MODE_PASSTHRU;
|
||||
}
|
||||
|
||||
radeon_set_context_reg(ctx_cs, R_028A98_VGT_DRAW_PAYLOAD_CNTL,
|
||||
|
|
|
@ -731,6 +731,14 @@ struct radv_device_border_color_data {
|
|||
mtx_t mutex;
|
||||
};
|
||||
|
||||
enum radv_force_vrs
|
||||
{
|
||||
RADV_FORCE_VRS_NONE = 0,
|
||||
RADV_FORCE_VRS_2x2,
|
||||
RADV_FORCE_VRS_2x1,
|
||||
RADV_FORCE_VRS_1x2,
|
||||
};
|
||||
|
||||
struct radv_device {
|
||||
struct vk_device vk;
|
||||
|
||||
|
@ -823,6 +831,9 @@ struct radv_device {
|
|||
|
||||
/* Track the number of device loss occurs. */
|
||||
int lost;
|
||||
|
||||
/* Whether the user forced VRS rates on GFX10.3+. */
|
||||
enum radv_force_vrs force_vrs;
|
||||
};
|
||||
|
||||
VkResult _radv_device_set_lost(struct radv_device *device,
|
||||
|
@ -1643,6 +1654,9 @@ struct radv_event {
|
|||
#define RADV_HASH_SHADER_MRT_NAN_FIXUP (1 << 6)
|
||||
#define RADV_HASH_SHADER_INVARIANT_GEOM (1 << 7)
|
||||
#define RADV_HASH_SHADER_KEEP_STATISTICS (1 << 8)
|
||||
#define RADV_HASH_SHADER_FORCE_VRS_2x2 (1 << 9)
|
||||
#define RADV_HASH_SHADER_FORCE_VRS_2x1 (1 << 10)
|
||||
#define RADV_HASH_SHADER_FORCE_VRS_1x2 (1 << 11)
|
||||
|
||||
void
|
||||
radv_hash_shaders(unsigned char *hash,
|
||||
|
|
|
@ -1420,6 +1420,20 @@ shader_variant_compile(struct radv_device *device,
|
|||
options->debug.func = radv_compiler_debug;
|
||||
options->debug.private_data = &debug_data;
|
||||
|
||||
switch (device->force_vrs) {
|
||||
case RADV_FORCE_VRS_2x2:
|
||||
options->force_vrs_rates = (1u << 2) | (1u << 4);
|
||||
break;
|
||||
case RADV_FORCE_VRS_2x1:
|
||||
options->force_vrs_rates = (0u << 2) | (1u << 4);
|
||||
break;
|
||||
case RADV_FORCE_VRS_1x2:
|
||||
options->force_vrs_rates = (1u << 2) | (0u << 4);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
struct radv_shader_args args = {0};
|
||||
args.options = options;
|
||||
args.shader_info = info;
|
||||
|
|
|
@ -142,6 +142,7 @@ struct radv_nir_compiler_options {
|
|||
const struct radeon_info *info;
|
||||
uint32_t tess_offchip_block_dw_size;
|
||||
uint32_t address32_hi;
|
||||
uint8_t force_vrs_rates;
|
||||
|
||||
struct {
|
||||
void (*func)(void *private_data,
|
||||
|
|
Loading…
Reference in New Issue