aco: implement fragment shading rate

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7837>
This commit is contained in:
Samuel Pitoiset 2020-11-23 16:01:00 +01:00 committed by Marge Bot
parent 0bac0b7f19
commit c587eaadf6
2 changed files with 70 additions and 4 deletions

View File

@ -4581,6 +4581,34 @@ void emit_load_frag_coord(isel_context *ctx, Temp dst, unsigned num_components)
return;
}
void emit_load_frag_shading_rate(isel_context *ctx, Temp dst)
{
Builder bld(ctx->program, ctx->block);
Temp cond;
/* VRS Rate X = Ancillary[2:3]
* VRS Rate Y = Ancillary[4:5]
*/
Temp x_rate = bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1),
get_arg(ctx, ctx->args->ac.ancillary), Operand(2u), Operand(2u));
Temp y_rate = bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1),
get_arg(ctx, ctx->args->ac.ancillary), Operand(4u), Operand(2u));
/* xRate = xRate == 0x1 ? Horizontal2Pixels : None. */
cond = bld.vopc(aco_opcode::v_cmp_eq_i32, bld.def(bld.lm), Operand(1u), Operand(x_rate));
x_rate = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1),
bld.copy(bld.def(v1), Operand(0u)),
bld.copy(bld.def(v1), Operand(4u)), cond);
/* yRate = yRate == 0x1 ? Vertical2Pixels : None. */
cond = bld.vopc(aco_opcode::v_cmp_eq_i32, bld.def(bld.lm), Operand(1u), Operand(y_rate));
y_rate = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1),
bld.copy(bld.def(v1), Operand(0u)),
bld.copy(bld.def(v1), Operand(1u)), cond);
bld.vop2(aco_opcode::v_or_b32, Definition(dst), Operand(x_rate), Operand(y_rate));
}
void visit_load_interpolated_input(isel_context *ctx, nir_intrinsic_instr *instr)
{
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
@ -7701,6 +7729,9 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
emit_load_frag_coord(ctx, get_ssa_temp(ctx, &instr->dest.ssa), 4);
break;
}
case nir_intrinsic_load_frag_shading_rate:
emit_load_frag_shading_rate(ctx, get_ssa_temp(ctx, &instr->dest.ssa));
break;
case nir_intrinsic_load_sample_pos: {
Temp posx = get_arg(ctx, ctx->args->ac.frag_pos[0]);
Temp posy = get_arg(ctx, ctx->args->ac.frag_pos[1]);
@ -10174,7 +10205,7 @@ static void export_vs_varying(isel_context *ctx, int slot, bool is_pos, int *nex
ctx->block->instructions.emplace_back(std::move(exp));
}
static void export_vs_psiz_layer_viewport(isel_context *ctx, int *next_pos)
static void export_vs_psiz_layer_viewport_vrs(isel_context *ctx, int *next_pos)
{
aco_ptr<Export_instruction> exp{create_instruction<Export_instruction>(aco_opcode::exp, Format::EXP, 4, 0)};
exp->enabled_mask = 0;
@ -10204,6 +10235,37 @@ static void export_vs_psiz_layer_viewport(isel_context *ctx, int *next_pos)
exp->enabled_mask |= 0x4;
}
}
if (ctx->outputs.mask[VARYING_SLOT_PRIMITIVE_SHADING_RATE]) {
Builder bld(ctx->program, ctx->block);
Temp cond;
/* xRate = (shadingRate & (Horizontal2Pixels | Horizontal4Pixels)) ? 0x1 : 0x0; */
Temp x_rate = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(12u),
Operand(ctx->outputs.temps[VARYING_SLOT_PRIMITIVE_SHADING_RATE * 4u]));
cond = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand(0u), Operand(x_rate));
x_rate = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1),
bld.copy(bld.def(v1), Operand(0u)),
bld.copy(bld.def(v1), Operand(1u)), cond);
/* yRate = (shadingRate & (Vertical2Pixels | Vertical4Pixels)) ? 0x1 : 0x0; */
Temp y_rate = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(3u),
Operand(ctx->outputs.temps[VARYING_SLOT_PRIMITIVE_SHADING_RATE * 4u]));
cond = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand(0u), Operand(y_rate));
y_rate = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1),
bld.copy(bld.def(v1), Operand(0u)),
bld.copy(bld.def(v1), Operand(1u)), cond);
/* Bits [2:3] = VRS rate X
* Bits [4:5] = VRS rate Y
* HW shading rate = (xRate << 2) | (yRate << 4)
*/
y_rate = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(4u), Operand(y_rate));
Temp out = bld.vop3(aco_opcode::v_lshl_or_b32, bld.def(v1), Operand(x_rate), Operand(2u), Operand(y_rate));
exp->operands[1] = Operand(out);
exp->enabled_mask |= 0x2;
}
exp->valid_mask = ctx->options->chip_class == GFX10 && *next_pos == 0;
exp->done = false;
exp->compressed = false;
@ -10267,8 +10329,9 @@ static void create_vs_exports(isel_context *ctx)
/* the order these position exports are created is important */
int next_pos = 0;
export_vs_varying(ctx, VARYING_SLOT_POS, true, &next_pos);
if (outinfo->writes_pointsize || outinfo->writes_layer || outinfo->writes_viewport_index) {
export_vs_psiz_layer_viewport(ctx, &next_pos);
if (outinfo->writes_pointsize || outinfo->writes_layer || outinfo->writes_viewport_index ||
outinfo->writes_primitive_shading_rate) {
export_vs_psiz_layer_viewport_vrs(ctx, &next_pos);
}
if (ctx->num_clip_distances + ctx->num_cull_distances > 0)
export_vs_varying(ctx, VARYING_SLOT_CLIP_DIST0, true, &next_pos);

View File

@ -327,7 +327,8 @@ setup_vs_output_info(isel_context *ctx, nir_shader *nir,
outinfo->param_exports = 0;
int pos_written = 0x1;
if (outinfo->writes_pointsize || outinfo->writes_viewport_index || outinfo->writes_layer)
if (outinfo->writes_pointsize || outinfo->writes_viewport_index || outinfo->writes_layer ||
outinfo->writes_primitive_shading_rate)
pos_written |= 1 << 1;
uint64_t mask = nir->info.outputs_written;
@ -797,6 +798,7 @@ void init_context(isel_context *ctx, nir_shader *shader)
case nir_intrinsic_load_barycentric_at_offset:
case nir_intrinsic_load_interpolated_input:
case nir_intrinsic_load_frag_coord:
case nir_intrinsic_load_frag_shading_rate:
case nir_intrinsic_load_sample_pos:
case nir_intrinsic_load_layer_id:
case nir_intrinsic_load_local_invocation_id:
@ -910,6 +912,7 @@ void init_context(isel_context *ctx, nir_shader *shader)
break;
}
case nir_intrinsic_load_sample_id:
case nir_intrinsic_load_frag_shading_rate:
spi_ps_inputs |= S_0286CC_ANCILLARY_ENA(1);
break;
case nir_intrinsic_load_sample_mask_in: