aco: Use strong typing to model SW<->HW stage mappings

Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Acked-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7094>
This commit is contained in:
Tony Wasserka 2020-10-05 17:50:37 +02:00 committed by Marge Bot
parent fdbc45d1d4
commit 86c227c10c
8 changed files with 174 additions and 115 deletions

View File

@ -732,7 +732,7 @@ void fix_exports(asm_context& ctx, std::vector<uint32_t>& out, Program* program)
{
if ((*it)->format == Format::EXP) {
Export_instruction* exp = static_cast<Export_instruction*>((*it).get());
if (program->stage & (hw_vs | hw_ngg_gs)) {
if (program->stage.hw == HWStage::VS || program->stage.hw == HWStage::NGG_GS) {
if (exp->dest >= V_008DFC_SQ_EXP_POS && exp->dest <= (V_008DFC_SQ_EXP_POS + 3)) {
exp->done = true;
exported = true;
@ -752,7 +752,8 @@ void fix_exports(asm_context& ctx, std::vector<uint32_t>& out, Program* program)
if (!exported) {
/* Abort in order to avoid a GPU hang. */
aco_err(program, "Missing export in %s shader:", (program->stage & (hw_vs | hw_ngg_gs)) ? "vertex or NGG" : "fragment");
bool is_vertex_or_ngg = (program->stage.hw == HWStage::VS || program->stage.hw == HWStage::NGG_GS);
aco_err(program, "Missing export in %s shader:", is_vertex_or_ngg ? "vertex or NGG" : "fragment");
aco_print_program(program, stderr);
abort();
}
@ -917,7 +918,9 @@ unsigned emit_program(Program* program,
{
asm_context ctx(program);
if (program->stage & (hw_vs | hw_fs | hw_ngg_gs))
if (program->stage.hw == HWStage::VS ||
program->stage.hw == HWStage::FS ||
program->stage.hw == HWStage::NGG_GS)
fix_exports(ctx, code, program);
for (Block& block : program->blocks) {

View File

@ -379,7 +379,7 @@ unsigned add_coupling_code(exec_ctx& ctx, Block* block,
bld.insert(std::move(startpgm));
/* exec seems to need to be manually initialized with combined shaders */
if (util_bitcount(ctx.program->stage & sw_mask) > 1 || (ctx.program->stage & hw_ngg_gs)) {
if (ctx.program->stage.num_sw_stages() > 1 || ctx.program->stage.hw == HWStage::NGG_GS) {
bld.sop1(Builder::s_mov, bld.exec(Definition(exec_mask)), bld.lm == s2 ? Operand(UINT64_MAX) : Operand(UINT32_MAX));
instructions[0]->definitions.pop_back();
}

View File

@ -4252,7 +4252,7 @@ void visit_store_ls_or_es_output(isel_context *ctx, nir_intrinsic_instr *instr)
if (ctx->stage == vertex_geometry_gs || ctx->stage == tess_eval_geometry_gs ||
ctx->stage == ngg_vertex_geometry_gs || ctx->stage == ngg_tess_eval_geometry_gs) {
/* GFX9+: ES stage is merged into GS, data is passed between them using LDS. */
unsigned itemsize = (ctx->stage & sw_vs)
unsigned itemsize = ctx->stage.has(SWStage::VS)
? ctx->program->info->vs.es_info.esgs_itemsize
: ctx->program->info->tes.es_info.esgs_itemsize;
Temp vertex_idx = thread_id_in_threadgroup(ctx);
@ -4363,9 +4363,9 @@ void visit_store_output(isel_context *ctx, nir_intrinsic_instr *instr)
isel_err(instr->src[1].ssa->parent_instr, "Unimplemented output offset instruction");
abort();
}
} else if ((ctx->stage & (hw_ls | hw_es)) ||
} else if ((ctx->stage.hw == HWStage::LS || ctx->stage.hw == HWStage::ES) ||
(ctx->stage == vertex_tess_control_hs && ctx->shader->info.stage == MESA_SHADER_VERTEX) ||
((ctx->stage & sw_gs) && ctx->shader->info.stage != MESA_SHADER_GEOMETRY)) {
(ctx->stage.has(SWStage::GS) && ctx->shader->info.stage != MESA_SHADER_GEOMETRY)) {
visit_store_ls_or_es_output(ctx, instr);
} else if (ctx->shader->info.stage == MESA_SHADER_TESS_CTRL) {
visit_store_tcs_output(ctx, instr, false);
@ -7588,7 +7588,10 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
break;
}
case nir_intrinsic_load_view_index: {
if (ctx->stage & (sw_vs | sw_gs | sw_tcs | sw_tes)) {
if (ctx->stage.has(SWStage::VS) ||
ctx->stage.has(SWStage::GS) ||
ctx->stage.has(SWStage::TCS) ||
ctx->stage.has(SWStage::TES)) {
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
bld.copy(Definition(dst), Operand(get_arg(ctx, ctx->args->ac.view_index)));
break;
@ -8348,21 +8351,21 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
break;
}
case nir_intrinsic_emit_vertex_with_counter: {
if (ctx->stage & hw_ngg_gs)
if (ctx->stage.hw == HWStage::NGG_GS)
ngg_visit_emit_vertex_with_counter(ctx, instr);
else
visit_emit_vertex_with_counter(ctx, instr);
break;
}
case nir_intrinsic_end_primitive_with_counter: {
if ((ctx->stage & hw_ngg_gs) == 0) {
if (ctx->stage.hw != HWStage::NGG_GS) {
unsigned stream = nir_intrinsic_stream_id(instr);
bld.sopp(aco_opcode::s_sendmsg, bld.m0(ctx->gs_wave_id), -1, sendmsg_gs(true, false, stream));
}
break;
}
case nir_intrinsic_set_vertex_and_primitive_count: {
if (ctx->stage & hw_ngg_gs)
if (ctx->stage.hw == HWStage::NGG_GS)
ngg_visit_set_vertex_and_primitive_count(ctx, instr);
/* unused in the legacy pipeline, the HW keeps track of this for us */
break;
@ -10079,9 +10082,9 @@ static bool visit_cf_list(isel_context *ctx,
static void export_vs_varying(isel_context *ctx, int slot, bool is_pos, int *next_pos)
{
assert(ctx->stage & (hw_vs | hw_ngg_gs));
assert(ctx->stage.hw == HWStage::VS || ctx->stage.hw == HWStage::NGG_GS);
int offset = ((ctx->stage & sw_tes) && !(ctx->stage & sw_gs))
int offset = (ctx->stage.has(SWStage::TES) && !ctx->stage.has(SWStage::GS))
? ctx->program->info->tes.outinfo.vs_output_param_offset[slot]
: ctx->program->info->vs.outinfo.vs_output_param_offset[slot];
uint64_t mask = ctx->outputs.mask[slot];
@ -10176,15 +10179,15 @@ static void create_export_phis(isel_context *ctx)
static void create_vs_exports(isel_context *ctx)
{
assert(ctx->stage & (hw_vs | hw_ngg_gs));
assert(ctx->stage.hw == HWStage::VS || ctx->stage.hw == HWStage::NGG_GS);
radv_vs_output_info *outinfo = ((ctx->stage & sw_tes) && !(ctx->stage & sw_gs))
radv_vs_output_info *outinfo = (ctx->stage.has(SWStage::TES) && !ctx->stage.has(SWStage::GS))
? &ctx->program->info->tes.outinfo
: &ctx->program->info->vs.outinfo;
if (outinfo->export_prim_id && !(ctx->stage & hw_ngg_gs)) {
if (outinfo->export_prim_id && ctx->stage.hw != HWStage::NGG_GS) {
ctx->outputs.mask[VARYING_SLOT_PRIMITIVE_ID] |= 0x1;
if (ctx->stage & sw_tes)
if (ctx->stage.has(SWStage::TES))
ctx->outputs.temps[VARYING_SLOT_PRIMITIVE_ID * 4u] = get_arg(ctx, ctx->args->ac.tes_patch_id);
else
ctx->outputs.temps[VARYING_SLOT_PRIMITIVE_ID * 4u] = get_arg(ctx, ctx->args->vs_prim_id);
@ -10646,7 +10649,7 @@ static void emit_stream_output(isel_context *ctx,
Temp out[4];
bool all_undef = true;
assert(ctx->stage & hw_vs);
assert(ctx->stage.hw == HWStage::VS);
for (unsigned i = 0; i < num_comps; i++) {
out[i] = ctx->outputs.temps[loc * 4 + start + i];
all_undef = all_undef && !out[i].id();
@ -11055,7 +11058,7 @@ void ngg_emit_sendmsg_gs_alloc_req(isel_context *ctx, Temp vtx_cnt = Temp(), Tem
/* VS/TES: we infer the vertex and primitive count from arguments
* GS: the caller needs to supply them
*/
assert((ctx->stage & sw_gs)
assert(ctx->stage.has(SWStage::GS)
? (vtx_cnt.id() && prm_cnt.id())
: (!vtx_cnt.id() && !prm_cnt.id()));
@ -11117,7 +11120,7 @@ void ngg_emit_prim_export(isel_context *ctx, unsigned num_vertices_per_primitive
Builder bld(ctx->program, ctx->block);
Temp prim_exp_arg;
if (!(ctx->stage & sw_gs) && ctx->args->options->key.vs_common_out.as_ngg_passthrough)
if (!ctx->stage.has(SWStage::GS) && ctx->args->options->key.vs_common_out.as_ngg_passthrough)
prim_exp_arg = get_arg(ctx, ctx->args->gs_vtx_offset[0]);
else
prim_exp_arg = ngg_pack_prim_exp_arg(ctx, num_vertices_per_primitive, vtxindex, is_null);
@ -11144,7 +11147,7 @@ void ngg_nogs_export_primitives(isel_context *ctx)
constexpr unsigned max_vertices_per_primitive = 3;
unsigned num_vertices_per_primitive = max_vertices_per_primitive;
assert(!(ctx->stage & sw_gs));
assert(!ctx->stage.has(SWStage::GS));
if (ctx->stage == ngg_vertex_gs) {
/* TODO: optimize for points & lines */
@ -11711,10 +11714,10 @@ void select_program(Program *program,
visit_cf_list(&ctx, &func->body);
if (ctx.program->info->so.num_outputs && (ctx.stage & hw_vs))
if (ctx.program->info->so.num_outputs && ctx.stage.hw == HWStage::VS)
emit_streamout(&ctx, 0);
if (ctx.stage & hw_vs) {
if (ctx.stage.hw == HWStage::VS) {
create_vs_exports(&ctx);
ctx.block->kind |= block_kind_export_end;
} else if (ngg_no_gs && ctx.ngg_nogs_early_prim_export) {

View File

@ -61,7 +61,7 @@ struct isel_context {
Block *block;
std::unique_ptr<Temp[]> allocated;
std::unordered_map<unsigned, std::array<Temp,NIR_MAX_VEC_COMPONENTS>> allocated_vec;
Stage stage; /* Stage */
Stage stage;
bool has_gfx10_wave64_bpermute = false;
struct {
bool has_branch;

View File

@ -435,7 +435,7 @@ setup_vs_variables(isel_context *ctx, nir_shader *nir)
ctx->options->key.vs_common_out.export_clip_dists, outinfo);
/* TODO: NGG streamout */
if (ctx->stage & hw_ngg_gs)
if (ctx->stage.hw == HWStage::NGG_GS)
assert(!ctx->args->shader_info->so.num_outputs);
/* TODO: check if the shader writes edge flags (not in Vulkan) */
@ -481,9 +481,9 @@ void setup_gs_variables(isel_context *ctx, nir_shader *nir)
ctx->ngg_gs_early_alloc = ctx->ngg_gs_const_vtxcnt[0] == nir->info.gs.vertices_out && ctx->ngg_gs_const_prmcnt[0] != -1;
}
if (ctx->stage & sw_vs)
if (ctx->stage.has(SWStage::VS))
ctx->program->info->gs.es_type = MESA_SHADER_VERTEX;
else if (ctx->stage & sw_tes)
else if (ctx->stage.has(SWStage::TES))
ctx->program->info->gs.es_type = MESA_SHADER_TESS_EVAL;
}
@ -550,7 +550,7 @@ setup_tes_variables(isel_context *ctx, nir_shader *nir)
ctx->options->key.vs_common_out.export_clip_dists, outinfo);
/* TODO: NGG streamout */
if (ctx->stage & hw_ngg_gs)
if (ctx->stage.hw == HWStage::NGG_GS)
assert(!ctx->args->shader_info->so.num_outputs);
/* Tess eval shaders can't write edge flags, so this can be always true. */
@ -644,7 +644,7 @@ void init_context(isel_context *ctx, nir_shader *shader)
/* we'll need this for isel */
nir_metadata_require(impl, nir_metadata_block_index);
if (!(ctx->stage & sw_gs_copy) && ctx->options->dump_preoptir) {
if (!ctx->stage.has(SWStage::GSCopy) && ctx->options->dump_preoptir) {
fprintf(stderr, "NIR shader before instruction selection:\n");
nir_print_shader(shader, stderr);
}
@ -1022,26 +1022,26 @@ setup_isel_context(Program* program,
struct radv_shader_args *args,
bool is_gs_copy_shader)
{
Stage stage = 0;
SWStage sw_stage = SWStage::None;
for (unsigned i = 0; i < shader_count; i++) {
switch (shaders[i]->info.stage) {
case MESA_SHADER_VERTEX:
stage |= sw_vs;
sw_stage = sw_stage | SWStage::VS;
break;
case MESA_SHADER_TESS_CTRL:
stage |= sw_tcs;
sw_stage = sw_stage | SWStage::TCS;
break;
case MESA_SHADER_TESS_EVAL:
stage |= sw_tes;
sw_stage = sw_stage | SWStage::TES;
break;
case MESA_SHADER_GEOMETRY:
stage |= is_gs_copy_shader ? sw_gs_copy : sw_gs;
sw_stage = sw_stage | (is_gs_copy_shader ? SWStage::GSCopy : SWStage::GS);
break;
case MESA_SHADER_FRAGMENT:
stage |= sw_fs;
sw_stage = sw_stage | SWStage::FS;
break;
case MESA_SHADER_COMPUTE:
stage |= sw_cs;
sw_stage = sw_stage | SWStage::CS;
break;
default:
unreachable("Shader stage not implemented");
@ -1049,44 +1049,45 @@ setup_isel_context(Program* program,
}
bool gfx9_plus = args->options->chip_class >= GFX9;
bool ngg = args->shader_info->is_ngg && args->options->chip_class >= GFX10;
if (stage == sw_vs && args->shader_info->vs.as_es && !ngg)
stage |= hw_es;
else if (stage == sw_vs && !args->shader_info->vs.as_ls && !ngg)
stage |= hw_vs;
else if (stage == sw_vs && ngg)
stage |= hw_ngg_gs; /* GFX10/NGG: VS without GS uses the HW GS stage */
else if (stage == sw_gs)
stage |= hw_gs;
else if (stage == sw_fs)
stage |= hw_fs;
else if (stage == sw_cs)
stage |= hw_cs;
else if (stage == sw_gs_copy)
stage |= hw_vs;
else if (stage == (sw_vs | sw_gs) && gfx9_plus && !ngg)
stage |= hw_gs; /* GFX6-9: VS+GS merged into a GS (and GFX10/legacy) */
else if (stage == (sw_vs | sw_gs) && ngg)
stage |= hw_ngg_gs; /* GFX10+: VS+GS merged into an NGG GS */
else if (stage == sw_vs && args->shader_info->vs.as_ls)
stage |= hw_ls; /* GFX6-8: VS is a Local Shader, when tessellation is used */
else if (stage == sw_tcs)
stage |= hw_hs; /* GFX6-8: TCS is a Hull Shader */
else if (stage == (sw_vs | sw_tcs))
stage |= hw_hs; /* GFX9-10: VS+TCS merged into a Hull Shader */
else if (stage == sw_tes && !args->shader_info->tes.as_es && !ngg)
stage |= hw_vs; /* GFX6-9: TES without GS uses the HW VS stage (and GFX10/legacy) */
else if (stage == sw_tes && !args->shader_info->tes.as_es && ngg)
stage |= hw_ngg_gs; /* GFX10/NGG: TES without GS uses the HW GS stage */
else if (stage == sw_tes && args->shader_info->tes.as_es && !ngg)
stage |= hw_es; /* GFX6-8: TES is an Export Shader */
else if (stage == (sw_tes | sw_gs) && gfx9_plus && !ngg)
stage |= hw_gs; /* GFX9: TES+GS merged into a GS (and GFX10/legacy) */
else if (stage == (sw_tes | sw_gs) && ngg)
stage |= hw_ngg_gs; /* GFX10+: TES+GS merged into an NGG GS */
HWStage hw_stage { };
if (sw_stage == SWStage::VS && args->shader_info->vs.as_es && !ngg)
hw_stage = HWStage::ES;
else if (sw_stage == SWStage::VS && !args->shader_info->vs.as_ls && !ngg)
hw_stage = HWStage::VS;
else if (sw_stage == SWStage::VS && ngg)
hw_stage = HWStage::NGG_GS; /* GFX10/NGG: VS without GS uses the HW GS stage */
else if (sw_stage == SWStage::GS)
hw_stage = HWStage::GS;
else if (sw_stage == SWStage::FS)
hw_stage = HWStage::FS;
else if (sw_stage == SWStage::CS)
hw_stage = HWStage::CS;
else if (sw_stage == SWStage::GSCopy)
hw_stage = HWStage::VS;
else if (sw_stage == SWStage::VS_GS && gfx9_plus && !ngg)
hw_stage = HWStage::GS; /* GFX6-9: VS+GS merged into a GS (and GFX10/legacy) */
else if (sw_stage == SWStage::VS_GS && ngg)
hw_stage = HWStage::NGG_GS; /* GFX10+: VS+GS merged into an NGG GS */
else if (sw_stage == SWStage::VS && args->shader_info->vs.as_ls)
hw_stage = HWStage::LS; /* GFX6-8: VS is a Local Shader, when tessellation is used */
else if (sw_stage == SWStage::TCS)
hw_stage = HWStage::HS; /* GFX6-8: TCS is a Hull Shader */
else if (sw_stage == SWStage::VS_TCS)
hw_stage = HWStage::HS; /* GFX9-10: VS+TCS merged into a Hull Shader */
else if (sw_stage == SWStage::TES && !args->shader_info->tes.as_es && !ngg)
hw_stage = HWStage::VS; /* GFX6-9: TES without GS uses the HW VS stage (and GFX10/legacy) */
else if (sw_stage == SWStage::TES && !args->shader_info->tes.as_es && ngg)
hw_stage = HWStage::NGG_GS; /* GFX10/NGG: TES without GS uses the HW GS stage */
else if (sw_stage == SWStage::TES && args->shader_info->tes.as_es && !ngg)
hw_stage = HWStage::ES; /* GFX6-8: TES is an Export Shader */
else if (sw_stage == SWStage::TES_GS && gfx9_plus && !ngg)
hw_stage = HWStage::GS; /* GFX9: TES+GS merged into a GS (and GFX10/legacy) */
else if (sw_stage == SWStage::TES_GS && ngg)
hw_stage = HWStage::NGG_GS; /* GFX10+: TES+GS merged into an NGG GS */
else
unreachable("Shader stage not implemented");
init_program(program, stage, args->shader_info,
init_program(program, Stage { hw_stage, sw_stage }, args->shader_info,
args->options->chip_class, args->options->family, config);
isel_context ctx = {};
@ -1096,7 +1097,7 @@ setup_isel_context(Program* program,
ctx.stage = program->stage;
/* TODO: Check if we need to adjust min_waves for unknown workgroup sizes. */
if (program->stage & (hw_vs | hw_fs)) {
if (program->stage.hw == HWStage::VS || program->stage.hw == HWStage::FS) {
/* PS and legacy VS have separate waves, no workgroups */
program->workgroup_size = program->wave_size;
} else if (program->stage == compute_cs) {
@ -1104,10 +1105,10 @@ setup_isel_context(Program* program,
program->workgroup_size = shaders[0]->info.cs.local_size[0] *
shaders[0]->info.cs.local_size[1] *
shaders[0]->info.cs.local_size[2];
} else if ((program->stage & hw_es) || program->stage == geometry_gs) {
} else if (program->stage.hw == HWStage::ES || program->stage == geometry_gs) {
/* Unmerged ESGS operate in workgroups if on-chip GS (LDS rings) are enabled on GFX7-8 (not implemented in Mesa) */
program->workgroup_size = program->wave_size;
} else if (program->stage & hw_gs) {
} else if (program->stage.hw == HWStage::GS) {
/* If on-chip GS (LDS rings) are enabled on GFX9 or later, merged GS operates in workgroups */
assert(program->chip_class >= GFX9);
uint32_t es_verts_per_subgrp = G_028A44_ES_VERTS_PER_SUBGRP(program->info->gs_ring_info.vgt_gs_onchip_cntl);
@ -1125,7 +1126,7 @@ setup_isel_context(Program* program,
/* Merged LSHS operates in workgroups, but can still have a different number of LS and HS invocations */
setup_tcs_info(&ctx, shaders[1], shaders[0]);
program->workgroup_size = ctx.tcs_num_patches * MAX2(shaders[1]->info.tess.tcs_vertices_out, ctx.args->options->key.tcs.input_vertices);
} else if (program->stage & hw_ngg_gs) {
} else if (program->stage.hw == HWStage::NGG_GS) {
gfx10_ngg_info &ngg_info = args->shader_info->ngg_info;
/* Max ES (SW VS) threads */

View File

@ -1500,50 +1500,101 @@ struct Block {
Block() : index(0) {}
};
using Stage = uint16_t;
/*
* Shader stages as provided in Vulkan by the application. Contrast this to HWStage.
*/
enum class SWStage : uint8_t {
None = 0,
VS = 1 << 0, /* Vertex Shader */
GS = 1 << 1, /* Geometry Shader */
TCS = 1 << 2, /* Tessellation Control aka Hull Shader */
TES = 1 << 3, /* Tessellation Evaluation aka Domain Shader */
FS = 1 << 4, /* Fragment aka Pixel Shader */
CS = 1 << 5, /* Compute Shader */
GSCopy = 1 << 6, /* GS Copy Shader (internal) */
/* software stages */
static constexpr Stage sw_vs = 1 << 0;
static constexpr Stage sw_gs = 1 << 1;
static constexpr Stage sw_tcs = 1 << 2;
static constexpr Stage sw_tes = 1 << 3;
static constexpr Stage sw_fs = 1 << 4;
static constexpr Stage sw_cs = 1 << 5;
static constexpr Stage sw_gs_copy = 1 << 6;
static constexpr Stage sw_mask = 0x7f;
/* Stage combinations merged to run on a single HWStage */
VS_GS = VS | GS,
VS_TCS = VS | TCS,
TES_GS = TES | GS,
};
/* hardware stages (can't be OR'd, just a mask for convenience when testing multiple) */
static constexpr Stage hw_vs = 1 << 7;
static constexpr Stage hw_es = 1 << 8; /* Export shader: pre-GS (VS or TES) on GFX6-8. Combined into GS on GFX9 (and GFX10/legacy). */
static constexpr Stage hw_gs = 1 << 9; /* Geometry shader on GFX10/legacy and GFX6-9. */
static constexpr Stage hw_ngg_gs = 1 << 10; /* Geometry shader on GFX10/NGG. */
static constexpr Stage hw_ls = 1 << 11; /* Local shader: pre-TCS (VS) on GFX6-8. Combined into HS on GFX9 (and GFX10/legacy). */
static constexpr Stage hw_hs = 1 << 12; /* Hull shader: TCS on GFX6-8. Merged VS and TCS on GFX9-10. */
static constexpr Stage hw_fs = 1 << 13;
static constexpr Stage hw_cs = 1 << 14;
static constexpr Stage hw_mask = 0xff << 7;
constexpr SWStage operator|(SWStage a, SWStage b) {
return static_cast<SWStage>(static_cast<uint8_t>(a) | static_cast<uint8_t>(b));
}
/*
* Shader stages as running on the AMD GPU.
*
* The relation between HWStages and SWStages is not a one-to-one mapping:
* Some SWStages are merged by ACO to run on a single HWStage.
* See README.md for details.
*/
enum class HWStage : uint8_t {
VS,
ES, /* Export shader: pre-GS (VS or TES) on GFX6-8. Combined into GS on GFX9 (and GFX10/legacy). */
GS, /* Geometry shader on GFX10/legacy and GFX6-9. */
NGG_GS, /* Geometry shader on GFX10/NGG. */
LS, /* Local shader: pre-TCS (VS) on GFX6-8. Combined into HS on GFX9 (and GFX10/legacy). */
HS, /* Hull shader: TCS on GFX6-8. Merged VS and TCS on GFX9-10. */
FS,
CS,
};
/*
* Set of SWStages to be merged into a single shader paired with the
* HWStage it will run on.
*/
struct Stage {
constexpr Stage() = default;
explicit constexpr Stage(HWStage hw_, SWStage sw_) : sw(sw_), hw(hw_) { }
/* Check if the given SWStage is included */
constexpr bool has(SWStage stage) const {
return (static_cast<uint8_t>(sw) & static_cast<uint8_t>(stage));
}
unsigned num_sw_stages() const {
return util_bitcount(static_cast<uint8_t>(sw));
}
constexpr bool operator==(const Stage& other) const {
return sw == other.sw && hw == other.hw;
}
constexpr bool operator!=(const Stage& other) const {
return sw != other.sw || hw != other.hw;
}
/* Mask of merged software stages */
SWStage sw = SWStage::None;
/* Active hardware stage */
HWStage hw {};
};
/* possible settings of Program::stage */
static constexpr Stage vertex_vs = sw_vs | hw_vs;
static constexpr Stage fragment_fs = sw_fs | hw_fs;
static constexpr Stage compute_cs = sw_cs | hw_cs;
static constexpr Stage tess_eval_vs = sw_tes | hw_vs;
static constexpr Stage gs_copy_vs = sw_gs_copy | hw_vs;
static constexpr Stage vertex_vs(HWStage::VS, SWStage::VS);
static constexpr Stage fragment_fs(HWStage::FS, SWStage::FS);
static constexpr Stage compute_cs(HWStage::CS, SWStage::CS);
static constexpr Stage tess_eval_vs(HWStage::VS, SWStage::TES);
static constexpr Stage gs_copy_vs(HWStage::VS, SWStage::GSCopy);
/* GFX10/NGG */
static constexpr Stage ngg_vertex_gs = sw_vs | hw_ngg_gs;
static constexpr Stage ngg_vertex_geometry_gs = sw_vs | sw_gs | hw_ngg_gs;
static constexpr Stage ngg_tess_eval_gs = sw_tes | hw_ngg_gs;
static constexpr Stage ngg_tess_eval_geometry_gs = sw_tes | sw_gs | hw_ngg_gs;
static constexpr Stage ngg_vertex_gs(HWStage::NGG_GS, SWStage::VS);
static constexpr Stage ngg_vertex_geometry_gs(HWStage::NGG_GS, SWStage::VS_GS);
static constexpr Stage ngg_tess_eval_gs(HWStage::NGG_GS, SWStage::TES);
static constexpr Stage ngg_tess_eval_geometry_gs(HWStage::NGG_GS, SWStage::TES_GS);
/* GFX9 (and GFX10 if NGG isn't used) */
static constexpr Stage vertex_geometry_gs = sw_vs | sw_gs | hw_gs;
static constexpr Stage vertex_tess_control_hs = sw_vs | sw_tcs | hw_hs;
static constexpr Stage tess_eval_geometry_gs = sw_tes | sw_gs | hw_gs;
static constexpr Stage vertex_geometry_gs(HWStage::GS, SWStage::VS_GS);
static constexpr Stage vertex_tess_control_hs(HWStage::HS, SWStage::VS_TCS);
static constexpr Stage tess_eval_geometry_gs(HWStage::GS, SWStage::TES_GS);
/* pre-GFX9 */
static constexpr Stage vertex_ls = sw_vs | hw_ls; /* vertex before tesselation control */
static constexpr Stage vertex_es = sw_vs | hw_es; /* vertex before geometry */
static constexpr Stage tess_control_hs = sw_tcs | hw_hs;
static constexpr Stage tess_eval_es = sw_tes | hw_es; /* tesselation evaluation before geometry */
static constexpr Stage geometry_gs = sw_gs | hw_gs;
static constexpr Stage vertex_ls(HWStage::LS, SWStage::VS); /* vertex before tesselation control */
static constexpr Stage vertex_es(HWStage::ES, SWStage::VS); /* vertex before geometry */
static constexpr Stage tess_control_hs(HWStage::HS, SWStage::TCS);
static constexpr Stage tess_eval_es(HWStage::ES, SWStage::TES); /* tesselation evaluation before geometry */
static constexpr Stage geometry_gs(HWStage::GS, SWStage::GS);
enum statistic {
statistic_hash,
@ -1574,7 +1625,7 @@ public:
enum radeon_family family;
unsigned wave_size;
RegClass lane_mask;
Stage stage; /* Stage */
Stage stage;
bool needs_exact = false; /* there exists an instruction with disable_wqm = true */
bool needs_wqm = false; /* there exists a p_wqm instruction */
bool wb_smem_l1_on_end = false;

View File

@ -1726,7 +1726,7 @@ void lower_to_hw_instr(Program* program)
/* don't bother with an early exit near the end of the program */
if ((block->instructions.size() - 1 - j) <= 4 &&
block->instructions.back()->opcode == aco_opcode::s_endpgm) {
unsigned null_exp_dest = (ctx.program->stage & hw_fs) ? 9 /* NULL */ : V_008DFC_SQ_EXP_POS;
unsigned null_exp_dest = (ctx.program->stage.hw == HWStage::FS) ? 9 /* NULL */ : V_008DFC_SQ_EXP_POS;
bool ignore_early_exit = true;
for (unsigned k = j + 1; k < block->instructions.size(); ++k) {

View File

@ -862,7 +862,8 @@ void schedule_block(sched_ctx& ctx, Program *program, Block* block, live& live_v
}
}
if ((program->stage & (hw_vs | hw_ngg_gs)) && (block->kind & block_kind_export_end)) {
if ((program->stage.hw == HWStage::VS || program->stage.hw == HWStage::NGG_GS) &&
(block->kind & block_kind_export_end)) {
/* Try to move position exports as far up as possible, to reduce register
* usage and because ISA reference guides say so. */
for (unsigned idx = 0; idx < block->instructions.size(); idx++) {