aco: Use strong typing to model SW<->HW stage mappings
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Acked-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7094>
This commit is contained in:
parent
fdbc45d1d4
commit
86c227c10c
|
@ -732,7 +732,7 @@ void fix_exports(asm_context& ctx, std::vector<uint32_t>& out, Program* program)
|
||||||
{
|
{
|
||||||
if ((*it)->format == Format::EXP) {
|
if ((*it)->format == Format::EXP) {
|
||||||
Export_instruction* exp = static_cast<Export_instruction*>((*it).get());
|
Export_instruction* exp = static_cast<Export_instruction*>((*it).get());
|
||||||
if (program->stage & (hw_vs | hw_ngg_gs)) {
|
if (program->stage.hw == HWStage::VS || program->stage.hw == HWStage::NGG_GS) {
|
||||||
if (exp->dest >= V_008DFC_SQ_EXP_POS && exp->dest <= (V_008DFC_SQ_EXP_POS + 3)) {
|
if (exp->dest >= V_008DFC_SQ_EXP_POS && exp->dest <= (V_008DFC_SQ_EXP_POS + 3)) {
|
||||||
exp->done = true;
|
exp->done = true;
|
||||||
exported = true;
|
exported = true;
|
||||||
|
@ -752,7 +752,8 @@ void fix_exports(asm_context& ctx, std::vector<uint32_t>& out, Program* program)
|
||||||
|
|
||||||
if (!exported) {
|
if (!exported) {
|
||||||
/* Abort in order to avoid a GPU hang. */
|
/* Abort in order to avoid a GPU hang. */
|
||||||
aco_err(program, "Missing export in %s shader:", (program->stage & (hw_vs | hw_ngg_gs)) ? "vertex or NGG" : "fragment");
|
bool is_vertex_or_ngg = (program->stage.hw == HWStage::VS || program->stage.hw == HWStage::NGG_GS);
|
||||||
|
aco_err(program, "Missing export in %s shader:", is_vertex_or_ngg ? "vertex or NGG" : "fragment");
|
||||||
aco_print_program(program, stderr);
|
aco_print_program(program, stderr);
|
||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
|
@ -917,7 +918,9 @@ unsigned emit_program(Program* program,
|
||||||
{
|
{
|
||||||
asm_context ctx(program);
|
asm_context ctx(program);
|
||||||
|
|
||||||
if (program->stage & (hw_vs | hw_fs | hw_ngg_gs))
|
if (program->stage.hw == HWStage::VS ||
|
||||||
|
program->stage.hw == HWStage::FS ||
|
||||||
|
program->stage.hw == HWStage::NGG_GS)
|
||||||
fix_exports(ctx, code, program);
|
fix_exports(ctx, code, program);
|
||||||
|
|
||||||
for (Block& block : program->blocks) {
|
for (Block& block : program->blocks) {
|
||||||
|
|
|
@ -379,7 +379,7 @@ unsigned add_coupling_code(exec_ctx& ctx, Block* block,
|
||||||
bld.insert(std::move(startpgm));
|
bld.insert(std::move(startpgm));
|
||||||
|
|
||||||
/* exec seems to need to be manually initialized with combined shaders */
|
/* exec seems to need to be manually initialized with combined shaders */
|
||||||
if (util_bitcount(ctx.program->stage & sw_mask) > 1 || (ctx.program->stage & hw_ngg_gs)) {
|
if (ctx.program->stage.num_sw_stages() > 1 || ctx.program->stage.hw == HWStage::NGG_GS) {
|
||||||
bld.sop1(Builder::s_mov, bld.exec(Definition(exec_mask)), bld.lm == s2 ? Operand(UINT64_MAX) : Operand(UINT32_MAX));
|
bld.sop1(Builder::s_mov, bld.exec(Definition(exec_mask)), bld.lm == s2 ? Operand(UINT64_MAX) : Operand(UINT32_MAX));
|
||||||
instructions[0]->definitions.pop_back();
|
instructions[0]->definitions.pop_back();
|
||||||
}
|
}
|
||||||
|
|
|
@ -4252,7 +4252,7 @@ void visit_store_ls_or_es_output(isel_context *ctx, nir_intrinsic_instr *instr)
|
||||||
if (ctx->stage == vertex_geometry_gs || ctx->stage == tess_eval_geometry_gs ||
|
if (ctx->stage == vertex_geometry_gs || ctx->stage == tess_eval_geometry_gs ||
|
||||||
ctx->stage == ngg_vertex_geometry_gs || ctx->stage == ngg_tess_eval_geometry_gs) {
|
ctx->stage == ngg_vertex_geometry_gs || ctx->stage == ngg_tess_eval_geometry_gs) {
|
||||||
/* GFX9+: ES stage is merged into GS, data is passed between them using LDS. */
|
/* GFX9+: ES stage is merged into GS, data is passed between them using LDS. */
|
||||||
unsigned itemsize = (ctx->stage & sw_vs)
|
unsigned itemsize = ctx->stage.has(SWStage::VS)
|
||||||
? ctx->program->info->vs.es_info.esgs_itemsize
|
? ctx->program->info->vs.es_info.esgs_itemsize
|
||||||
: ctx->program->info->tes.es_info.esgs_itemsize;
|
: ctx->program->info->tes.es_info.esgs_itemsize;
|
||||||
Temp vertex_idx = thread_id_in_threadgroup(ctx);
|
Temp vertex_idx = thread_id_in_threadgroup(ctx);
|
||||||
|
@ -4363,9 +4363,9 @@ void visit_store_output(isel_context *ctx, nir_intrinsic_instr *instr)
|
||||||
isel_err(instr->src[1].ssa->parent_instr, "Unimplemented output offset instruction");
|
isel_err(instr->src[1].ssa->parent_instr, "Unimplemented output offset instruction");
|
||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
} else if ((ctx->stage & (hw_ls | hw_es)) ||
|
} else if ((ctx->stage.hw == HWStage::LS || ctx->stage.hw == HWStage::ES) ||
|
||||||
(ctx->stage == vertex_tess_control_hs && ctx->shader->info.stage == MESA_SHADER_VERTEX) ||
|
(ctx->stage == vertex_tess_control_hs && ctx->shader->info.stage == MESA_SHADER_VERTEX) ||
|
||||||
((ctx->stage & sw_gs) && ctx->shader->info.stage != MESA_SHADER_GEOMETRY)) {
|
(ctx->stage.has(SWStage::GS) && ctx->shader->info.stage != MESA_SHADER_GEOMETRY)) {
|
||||||
visit_store_ls_or_es_output(ctx, instr);
|
visit_store_ls_or_es_output(ctx, instr);
|
||||||
} else if (ctx->shader->info.stage == MESA_SHADER_TESS_CTRL) {
|
} else if (ctx->shader->info.stage == MESA_SHADER_TESS_CTRL) {
|
||||||
visit_store_tcs_output(ctx, instr, false);
|
visit_store_tcs_output(ctx, instr, false);
|
||||||
|
@ -7588,7 +7588,10 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case nir_intrinsic_load_view_index: {
|
case nir_intrinsic_load_view_index: {
|
||||||
if (ctx->stage & (sw_vs | sw_gs | sw_tcs | sw_tes)) {
|
if (ctx->stage.has(SWStage::VS) ||
|
||||||
|
ctx->stage.has(SWStage::GS) ||
|
||||||
|
ctx->stage.has(SWStage::TCS) ||
|
||||||
|
ctx->stage.has(SWStage::TES)) {
|
||||||
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
|
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
|
||||||
bld.copy(Definition(dst), Operand(get_arg(ctx, ctx->args->ac.view_index)));
|
bld.copy(Definition(dst), Operand(get_arg(ctx, ctx->args->ac.view_index)));
|
||||||
break;
|
break;
|
||||||
|
@ -8348,21 +8351,21 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case nir_intrinsic_emit_vertex_with_counter: {
|
case nir_intrinsic_emit_vertex_with_counter: {
|
||||||
if (ctx->stage & hw_ngg_gs)
|
if (ctx->stage.hw == HWStage::NGG_GS)
|
||||||
ngg_visit_emit_vertex_with_counter(ctx, instr);
|
ngg_visit_emit_vertex_with_counter(ctx, instr);
|
||||||
else
|
else
|
||||||
visit_emit_vertex_with_counter(ctx, instr);
|
visit_emit_vertex_with_counter(ctx, instr);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case nir_intrinsic_end_primitive_with_counter: {
|
case nir_intrinsic_end_primitive_with_counter: {
|
||||||
if ((ctx->stage & hw_ngg_gs) == 0) {
|
if (ctx->stage.hw != HWStage::NGG_GS) {
|
||||||
unsigned stream = nir_intrinsic_stream_id(instr);
|
unsigned stream = nir_intrinsic_stream_id(instr);
|
||||||
bld.sopp(aco_opcode::s_sendmsg, bld.m0(ctx->gs_wave_id), -1, sendmsg_gs(true, false, stream));
|
bld.sopp(aco_opcode::s_sendmsg, bld.m0(ctx->gs_wave_id), -1, sendmsg_gs(true, false, stream));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case nir_intrinsic_set_vertex_and_primitive_count: {
|
case nir_intrinsic_set_vertex_and_primitive_count: {
|
||||||
if (ctx->stage & hw_ngg_gs)
|
if (ctx->stage.hw == HWStage::NGG_GS)
|
||||||
ngg_visit_set_vertex_and_primitive_count(ctx, instr);
|
ngg_visit_set_vertex_and_primitive_count(ctx, instr);
|
||||||
/* unused in the legacy pipeline, the HW keeps track of this for us */
|
/* unused in the legacy pipeline, the HW keeps track of this for us */
|
||||||
break;
|
break;
|
||||||
|
@ -10079,9 +10082,9 @@ static bool visit_cf_list(isel_context *ctx,
|
||||||
|
|
||||||
static void export_vs_varying(isel_context *ctx, int slot, bool is_pos, int *next_pos)
|
static void export_vs_varying(isel_context *ctx, int slot, bool is_pos, int *next_pos)
|
||||||
{
|
{
|
||||||
assert(ctx->stage & (hw_vs | hw_ngg_gs));
|
assert(ctx->stage.hw == HWStage::VS || ctx->stage.hw == HWStage::NGG_GS);
|
||||||
|
|
||||||
int offset = ((ctx->stage & sw_tes) && !(ctx->stage & sw_gs))
|
int offset = (ctx->stage.has(SWStage::TES) && !ctx->stage.has(SWStage::GS))
|
||||||
? ctx->program->info->tes.outinfo.vs_output_param_offset[slot]
|
? ctx->program->info->tes.outinfo.vs_output_param_offset[slot]
|
||||||
: ctx->program->info->vs.outinfo.vs_output_param_offset[slot];
|
: ctx->program->info->vs.outinfo.vs_output_param_offset[slot];
|
||||||
uint64_t mask = ctx->outputs.mask[slot];
|
uint64_t mask = ctx->outputs.mask[slot];
|
||||||
|
@ -10176,15 +10179,15 @@ static void create_export_phis(isel_context *ctx)
|
||||||
|
|
||||||
static void create_vs_exports(isel_context *ctx)
|
static void create_vs_exports(isel_context *ctx)
|
||||||
{
|
{
|
||||||
assert(ctx->stage & (hw_vs | hw_ngg_gs));
|
assert(ctx->stage.hw == HWStage::VS || ctx->stage.hw == HWStage::NGG_GS);
|
||||||
|
|
||||||
radv_vs_output_info *outinfo = ((ctx->stage & sw_tes) && !(ctx->stage & sw_gs))
|
radv_vs_output_info *outinfo = (ctx->stage.has(SWStage::TES) && !ctx->stage.has(SWStage::GS))
|
||||||
? &ctx->program->info->tes.outinfo
|
? &ctx->program->info->tes.outinfo
|
||||||
: &ctx->program->info->vs.outinfo;
|
: &ctx->program->info->vs.outinfo;
|
||||||
|
|
||||||
if (outinfo->export_prim_id && !(ctx->stage & hw_ngg_gs)) {
|
if (outinfo->export_prim_id && ctx->stage.hw != HWStage::NGG_GS) {
|
||||||
ctx->outputs.mask[VARYING_SLOT_PRIMITIVE_ID] |= 0x1;
|
ctx->outputs.mask[VARYING_SLOT_PRIMITIVE_ID] |= 0x1;
|
||||||
if (ctx->stage & sw_tes)
|
if (ctx->stage.has(SWStage::TES))
|
||||||
ctx->outputs.temps[VARYING_SLOT_PRIMITIVE_ID * 4u] = get_arg(ctx, ctx->args->ac.tes_patch_id);
|
ctx->outputs.temps[VARYING_SLOT_PRIMITIVE_ID * 4u] = get_arg(ctx, ctx->args->ac.tes_patch_id);
|
||||||
else
|
else
|
||||||
ctx->outputs.temps[VARYING_SLOT_PRIMITIVE_ID * 4u] = get_arg(ctx, ctx->args->vs_prim_id);
|
ctx->outputs.temps[VARYING_SLOT_PRIMITIVE_ID * 4u] = get_arg(ctx, ctx->args->vs_prim_id);
|
||||||
|
@ -10646,7 +10649,7 @@ static void emit_stream_output(isel_context *ctx,
|
||||||
|
|
||||||
Temp out[4];
|
Temp out[4];
|
||||||
bool all_undef = true;
|
bool all_undef = true;
|
||||||
assert(ctx->stage & hw_vs);
|
assert(ctx->stage.hw == HWStage::VS);
|
||||||
for (unsigned i = 0; i < num_comps; i++) {
|
for (unsigned i = 0; i < num_comps; i++) {
|
||||||
out[i] = ctx->outputs.temps[loc * 4 + start + i];
|
out[i] = ctx->outputs.temps[loc * 4 + start + i];
|
||||||
all_undef = all_undef && !out[i].id();
|
all_undef = all_undef && !out[i].id();
|
||||||
|
@ -11055,7 +11058,7 @@ void ngg_emit_sendmsg_gs_alloc_req(isel_context *ctx, Temp vtx_cnt = Temp(), Tem
|
||||||
/* VS/TES: we infer the vertex and primitive count from arguments
|
/* VS/TES: we infer the vertex and primitive count from arguments
|
||||||
* GS: the caller needs to supply them
|
* GS: the caller needs to supply them
|
||||||
*/
|
*/
|
||||||
assert((ctx->stage & sw_gs)
|
assert(ctx->stage.has(SWStage::GS)
|
||||||
? (vtx_cnt.id() && prm_cnt.id())
|
? (vtx_cnt.id() && prm_cnt.id())
|
||||||
: (!vtx_cnt.id() && !prm_cnt.id()));
|
: (!vtx_cnt.id() && !prm_cnt.id()));
|
||||||
|
|
||||||
|
@ -11117,7 +11120,7 @@ void ngg_emit_prim_export(isel_context *ctx, unsigned num_vertices_per_primitive
|
||||||
Builder bld(ctx->program, ctx->block);
|
Builder bld(ctx->program, ctx->block);
|
||||||
Temp prim_exp_arg;
|
Temp prim_exp_arg;
|
||||||
|
|
||||||
if (!(ctx->stage & sw_gs) && ctx->args->options->key.vs_common_out.as_ngg_passthrough)
|
if (!ctx->stage.has(SWStage::GS) && ctx->args->options->key.vs_common_out.as_ngg_passthrough)
|
||||||
prim_exp_arg = get_arg(ctx, ctx->args->gs_vtx_offset[0]);
|
prim_exp_arg = get_arg(ctx, ctx->args->gs_vtx_offset[0]);
|
||||||
else
|
else
|
||||||
prim_exp_arg = ngg_pack_prim_exp_arg(ctx, num_vertices_per_primitive, vtxindex, is_null);
|
prim_exp_arg = ngg_pack_prim_exp_arg(ctx, num_vertices_per_primitive, vtxindex, is_null);
|
||||||
|
@ -11144,7 +11147,7 @@ void ngg_nogs_export_primitives(isel_context *ctx)
|
||||||
constexpr unsigned max_vertices_per_primitive = 3;
|
constexpr unsigned max_vertices_per_primitive = 3;
|
||||||
unsigned num_vertices_per_primitive = max_vertices_per_primitive;
|
unsigned num_vertices_per_primitive = max_vertices_per_primitive;
|
||||||
|
|
||||||
assert(!(ctx->stage & sw_gs));
|
assert(!ctx->stage.has(SWStage::GS));
|
||||||
|
|
||||||
if (ctx->stage == ngg_vertex_gs) {
|
if (ctx->stage == ngg_vertex_gs) {
|
||||||
/* TODO: optimize for points & lines */
|
/* TODO: optimize for points & lines */
|
||||||
|
@ -11711,10 +11714,10 @@ void select_program(Program *program,
|
||||||
|
|
||||||
visit_cf_list(&ctx, &func->body);
|
visit_cf_list(&ctx, &func->body);
|
||||||
|
|
||||||
if (ctx.program->info->so.num_outputs && (ctx.stage & hw_vs))
|
if (ctx.program->info->so.num_outputs && ctx.stage.hw == HWStage::VS)
|
||||||
emit_streamout(&ctx, 0);
|
emit_streamout(&ctx, 0);
|
||||||
|
|
||||||
if (ctx.stage & hw_vs) {
|
if (ctx.stage.hw == HWStage::VS) {
|
||||||
create_vs_exports(&ctx);
|
create_vs_exports(&ctx);
|
||||||
ctx.block->kind |= block_kind_export_end;
|
ctx.block->kind |= block_kind_export_end;
|
||||||
} else if (ngg_no_gs && ctx.ngg_nogs_early_prim_export) {
|
} else if (ngg_no_gs && ctx.ngg_nogs_early_prim_export) {
|
||||||
|
|
|
@ -61,7 +61,7 @@ struct isel_context {
|
||||||
Block *block;
|
Block *block;
|
||||||
std::unique_ptr<Temp[]> allocated;
|
std::unique_ptr<Temp[]> allocated;
|
||||||
std::unordered_map<unsigned, std::array<Temp,NIR_MAX_VEC_COMPONENTS>> allocated_vec;
|
std::unordered_map<unsigned, std::array<Temp,NIR_MAX_VEC_COMPONENTS>> allocated_vec;
|
||||||
Stage stage; /* Stage */
|
Stage stage;
|
||||||
bool has_gfx10_wave64_bpermute = false;
|
bool has_gfx10_wave64_bpermute = false;
|
||||||
struct {
|
struct {
|
||||||
bool has_branch;
|
bool has_branch;
|
||||||
|
|
|
@ -435,7 +435,7 @@ setup_vs_variables(isel_context *ctx, nir_shader *nir)
|
||||||
ctx->options->key.vs_common_out.export_clip_dists, outinfo);
|
ctx->options->key.vs_common_out.export_clip_dists, outinfo);
|
||||||
|
|
||||||
/* TODO: NGG streamout */
|
/* TODO: NGG streamout */
|
||||||
if (ctx->stage & hw_ngg_gs)
|
if (ctx->stage.hw == HWStage::NGG_GS)
|
||||||
assert(!ctx->args->shader_info->so.num_outputs);
|
assert(!ctx->args->shader_info->so.num_outputs);
|
||||||
|
|
||||||
/* TODO: check if the shader writes edge flags (not in Vulkan) */
|
/* TODO: check if the shader writes edge flags (not in Vulkan) */
|
||||||
|
@ -481,9 +481,9 @@ void setup_gs_variables(isel_context *ctx, nir_shader *nir)
|
||||||
ctx->ngg_gs_early_alloc = ctx->ngg_gs_const_vtxcnt[0] == nir->info.gs.vertices_out && ctx->ngg_gs_const_prmcnt[0] != -1;
|
ctx->ngg_gs_early_alloc = ctx->ngg_gs_const_vtxcnt[0] == nir->info.gs.vertices_out && ctx->ngg_gs_const_prmcnt[0] != -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ctx->stage & sw_vs)
|
if (ctx->stage.has(SWStage::VS))
|
||||||
ctx->program->info->gs.es_type = MESA_SHADER_VERTEX;
|
ctx->program->info->gs.es_type = MESA_SHADER_VERTEX;
|
||||||
else if (ctx->stage & sw_tes)
|
else if (ctx->stage.has(SWStage::TES))
|
||||||
ctx->program->info->gs.es_type = MESA_SHADER_TESS_EVAL;
|
ctx->program->info->gs.es_type = MESA_SHADER_TESS_EVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -550,7 +550,7 @@ setup_tes_variables(isel_context *ctx, nir_shader *nir)
|
||||||
ctx->options->key.vs_common_out.export_clip_dists, outinfo);
|
ctx->options->key.vs_common_out.export_clip_dists, outinfo);
|
||||||
|
|
||||||
/* TODO: NGG streamout */
|
/* TODO: NGG streamout */
|
||||||
if (ctx->stage & hw_ngg_gs)
|
if (ctx->stage.hw == HWStage::NGG_GS)
|
||||||
assert(!ctx->args->shader_info->so.num_outputs);
|
assert(!ctx->args->shader_info->so.num_outputs);
|
||||||
|
|
||||||
/* Tess eval shaders can't write edge flags, so this can be always true. */
|
/* Tess eval shaders can't write edge flags, so this can be always true. */
|
||||||
|
@ -644,7 +644,7 @@ void init_context(isel_context *ctx, nir_shader *shader)
|
||||||
/* we'll need this for isel */
|
/* we'll need this for isel */
|
||||||
nir_metadata_require(impl, nir_metadata_block_index);
|
nir_metadata_require(impl, nir_metadata_block_index);
|
||||||
|
|
||||||
if (!(ctx->stage & sw_gs_copy) && ctx->options->dump_preoptir) {
|
if (!ctx->stage.has(SWStage::GSCopy) && ctx->options->dump_preoptir) {
|
||||||
fprintf(stderr, "NIR shader before instruction selection:\n");
|
fprintf(stderr, "NIR shader before instruction selection:\n");
|
||||||
nir_print_shader(shader, stderr);
|
nir_print_shader(shader, stderr);
|
||||||
}
|
}
|
||||||
|
@ -1022,26 +1022,26 @@ setup_isel_context(Program* program,
|
||||||
struct radv_shader_args *args,
|
struct radv_shader_args *args,
|
||||||
bool is_gs_copy_shader)
|
bool is_gs_copy_shader)
|
||||||
{
|
{
|
||||||
Stage stage = 0;
|
SWStage sw_stage = SWStage::None;
|
||||||
for (unsigned i = 0; i < shader_count; i++) {
|
for (unsigned i = 0; i < shader_count; i++) {
|
||||||
switch (shaders[i]->info.stage) {
|
switch (shaders[i]->info.stage) {
|
||||||
case MESA_SHADER_VERTEX:
|
case MESA_SHADER_VERTEX:
|
||||||
stage |= sw_vs;
|
sw_stage = sw_stage | SWStage::VS;
|
||||||
break;
|
break;
|
||||||
case MESA_SHADER_TESS_CTRL:
|
case MESA_SHADER_TESS_CTRL:
|
||||||
stage |= sw_tcs;
|
sw_stage = sw_stage | SWStage::TCS;
|
||||||
break;
|
break;
|
||||||
case MESA_SHADER_TESS_EVAL:
|
case MESA_SHADER_TESS_EVAL:
|
||||||
stage |= sw_tes;
|
sw_stage = sw_stage | SWStage::TES;
|
||||||
break;
|
break;
|
||||||
case MESA_SHADER_GEOMETRY:
|
case MESA_SHADER_GEOMETRY:
|
||||||
stage |= is_gs_copy_shader ? sw_gs_copy : sw_gs;
|
sw_stage = sw_stage | (is_gs_copy_shader ? SWStage::GSCopy : SWStage::GS);
|
||||||
break;
|
break;
|
||||||
case MESA_SHADER_FRAGMENT:
|
case MESA_SHADER_FRAGMENT:
|
||||||
stage |= sw_fs;
|
sw_stage = sw_stage | SWStage::FS;
|
||||||
break;
|
break;
|
||||||
case MESA_SHADER_COMPUTE:
|
case MESA_SHADER_COMPUTE:
|
||||||
stage |= sw_cs;
|
sw_stage = sw_stage | SWStage::CS;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
unreachable("Shader stage not implemented");
|
unreachable("Shader stage not implemented");
|
||||||
|
@ -1049,44 +1049,45 @@ setup_isel_context(Program* program,
|
||||||
}
|
}
|
||||||
bool gfx9_plus = args->options->chip_class >= GFX9;
|
bool gfx9_plus = args->options->chip_class >= GFX9;
|
||||||
bool ngg = args->shader_info->is_ngg && args->options->chip_class >= GFX10;
|
bool ngg = args->shader_info->is_ngg && args->options->chip_class >= GFX10;
|
||||||
if (stage == sw_vs && args->shader_info->vs.as_es && !ngg)
|
HWStage hw_stage { };
|
||||||
stage |= hw_es;
|
if (sw_stage == SWStage::VS && args->shader_info->vs.as_es && !ngg)
|
||||||
else if (stage == sw_vs && !args->shader_info->vs.as_ls && !ngg)
|
hw_stage = HWStage::ES;
|
||||||
stage |= hw_vs;
|
else if (sw_stage == SWStage::VS && !args->shader_info->vs.as_ls && !ngg)
|
||||||
else if (stage == sw_vs && ngg)
|
hw_stage = HWStage::VS;
|
||||||
stage |= hw_ngg_gs; /* GFX10/NGG: VS without GS uses the HW GS stage */
|
else if (sw_stage == SWStage::VS && ngg)
|
||||||
else if (stage == sw_gs)
|
hw_stage = HWStage::NGG_GS; /* GFX10/NGG: VS without GS uses the HW GS stage */
|
||||||
stage |= hw_gs;
|
else if (sw_stage == SWStage::GS)
|
||||||
else if (stage == sw_fs)
|
hw_stage = HWStage::GS;
|
||||||
stage |= hw_fs;
|
else if (sw_stage == SWStage::FS)
|
||||||
else if (stage == sw_cs)
|
hw_stage = HWStage::FS;
|
||||||
stage |= hw_cs;
|
else if (sw_stage == SWStage::CS)
|
||||||
else if (stage == sw_gs_copy)
|
hw_stage = HWStage::CS;
|
||||||
stage |= hw_vs;
|
else if (sw_stage == SWStage::GSCopy)
|
||||||
else if (stage == (sw_vs | sw_gs) && gfx9_plus && !ngg)
|
hw_stage = HWStage::VS;
|
||||||
stage |= hw_gs; /* GFX6-9: VS+GS merged into a GS (and GFX10/legacy) */
|
else if (sw_stage == SWStage::VS_GS && gfx9_plus && !ngg)
|
||||||
else if (stage == (sw_vs | sw_gs) && ngg)
|
hw_stage = HWStage::GS; /* GFX6-9: VS+GS merged into a GS (and GFX10/legacy) */
|
||||||
stage |= hw_ngg_gs; /* GFX10+: VS+GS merged into an NGG GS */
|
else if (sw_stage == SWStage::VS_GS && ngg)
|
||||||
else if (stage == sw_vs && args->shader_info->vs.as_ls)
|
hw_stage = HWStage::NGG_GS; /* GFX10+: VS+GS merged into an NGG GS */
|
||||||
stage |= hw_ls; /* GFX6-8: VS is a Local Shader, when tessellation is used */
|
else if (sw_stage == SWStage::VS && args->shader_info->vs.as_ls)
|
||||||
else if (stage == sw_tcs)
|
hw_stage = HWStage::LS; /* GFX6-8: VS is a Local Shader, when tessellation is used */
|
||||||
stage |= hw_hs; /* GFX6-8: TCS is a Hull Shader */
|
else if (sw_stage == SWStage::TCS)
|
||||||
else if (stage == (sw_vs | sw_tcs))
|
hw_stage = HWStage::HS; /* GFX6-8: TCS is a Hull Shader */
|
||||||
stage |= hw_hs; /* GFX9-10: VS+TCS merged into a Hull Shader */
|
else if (sw_stage == SWStage::VS_TCS)
|
||||||
else if (stage == sw_tes && !args->shader_info->tes.as_es && !ngg)
|
hw_stage = HWStage::HS; /* GFX9-10: VS+TCS merged into a Hull Shader */
|
||||||
stage |= hw_vs; /* GFX6-9: TES without GS uses the HW VS stage (and GFX10/legacy) */
|
else if (sw_stage == SWStage::TES && !args->shader_info->tes.as_es && !ngg)
|
||||||
else if (stage == sw_tes && !args->shader_info->tes.as_es && ngg)
|
hw_stage = HWStage::VS; /* GFX6-9: TES without GS uses the HW VS stage (and GFX10/legacy) */
|
||||||
stage |= hw_ngg_gs; /* GFX10/NGG: TES without GS uses the HW GS stage */
|
else if (sw_stage == SWStage::TES && !args->shader_info->tes.as_es && ngg)
|
||||||
else if (stage == sw_tes && args->shader_info->tes.as_es && !ngg)
|
hw_stage = HWStage::NGG_GS; /* GFX10/NGG: TES without GS uses the HW GS stage */
|
||||||
stage |= hw_es; /* GFX6-8: TES is an Export Shader */
|
else if (sw_stage == SWStage::TES && args->shader_info->tes.as_es && !ngg)
|
||||||
else if (stage == (sw_tes | sw_gs) && gfx9_plus && !ngg)
|
hw_stage = HWStage::ES; /* GFX6-8: TES is an Export Shader */
|
||||||
stage |= hw_gs; /* GFX9: TES+GS merged into a GS (and GFX10/legacy) */
|
else if (sw_stage == SWStage::TES_GS && gfx9_plus && !ngg)
|
||||||
else if (stage == (sw_tes | sw_gs) && ngg)
|
hw_stage = HWStage::GS; /* GFX9: TES+GS merged into a GS (and GFX10/legacy) */
|
||||||
stage |= hw_ngg_gs; /* GFX10+: TES+GS merged into an NGG GS */
|
else if (sw_stage == SWStage::TES_GS && ngg)
|
||||||
|
hw_stage = HWStage::NGG_GS; /* GFX10+: TES+GS merged into an NGG GS */
|
||||||
else
|
else
|
||||||
unreachable("Shader stage not implemented");
|
unreachable("Shader stage not implemented");
|
||||||
|
|
||||||
init_program(program, stage, args->shader_info,
|
init_program(program, Stage { hw_stage, sw_stage }, args->shader_info,
|
||||||
args->options->chip_class, args->options->family, config);
|
args->options->chip_class, args->options->family, config);
|
||||||
|
|
||||||
isel_context ctx = {};
|
isel_context ctx = {};
|
||||||
|
@ -1096,7 +1097,7 @@ setup_isel_context(Program* program,
|
||||||
ctx.stage = program->stage;
|
ctx.stage = program->stage;
|
||||||
|
|
||||||
/* TODO: Check if we need to adjust min_waves for unknown workgroup sizes. */
|
/* TODO: Check if we need to adjust min_waves for unknown workgroup sizes. */
|
||||||
if (program->stage & (hw_vs | hw_fs)) {
|
if (program->stage.hw == HWStage::VS || program->stage.hw == HWStage::FS) {
|
||||||
/* PS and legacy VS have separate waves, no workgroups */
|
/* PS and legacy VS have separate waves, no workgroups */
|
||||||
program->workgroup_size = program->wave_size;
|
program->workgroup_size = program->wave_size;
|
||||||
} else if (program->stage == compute_cs) {
|
} else if (program->stage == compute_cs) {
|
||||||
|
@ -1104,10 +1105,10 @@ setup_isel_context(Program* program,
|
||||||
program->workgroup_size = shaders[0]->info.cs.local_size[0] *
|
program->workgroup_size = shaders[0]->info.cs.local_size[0] *
|
||||||
shaders[0]->info.cs.local_size[1] *
|
shaders[0]->info.cs.local_size[1] *
|
||||||
shaders[0]->info.cs.local_size[2];
|
shaders[0]->info.cs.local_size[2];
|
||||||
} else if ((program->stage & hw_es) || program->stage == geometry_gs) {
|
} else if (program->stage.hw == HWStage::ES || program->stage == geometry_gs) {
|
||||||
/* Unmerged ESGS operate in workgroups if on-chip GS (LDS rings) are enabled on GFX7-8 (not implemented in Mesa) */
|
/* Unmerged ESGS operate in workgroups if on-chip GS (LDS rings) are enabled on GFX7-8 (not implemented in Mesa) */
|
||||||
program->workgroup_size = program->wave_size;
|
program->workgroup_size = program->wave_size;
|
||||||
} else if (program->stage & hw_gs) {
|
} else if (program->stage.hw == HWStage::GS) {
|
||||||
/* If on-chip GS (LDS rings) are enabled on GFX9 or later, merged GS operates in workgroups */
|
/* If on-chip GS (LDS rings) are enabled on GFX9 or later, merged GS operates in workgroups */
|
||||||
assert(program->chip_class >= GFX9);
|
assert(program->chip_class >= GFX9);
|
||||||
uint32_t es_verts_per_subgrp = G_028A44_ES_VERTS_PER_SUBGRP(program->info->gs_ring_info.vgt_gs_onchip_cntl);
|
uint32_t es_verts_per_subgrp = G_028A44_ES_VERTS_PER_SUBGRP(program->info->gs_ring_info.vgt_gs_onchip_cntl);
|
||||||
|
@ -1125,7 +1126,7 @@ setup_isel_context(Program* program,
|
||||||
/* Merged LSHS operates in workgroups, but can still have a different number of LS and HS invocations */
|
/* Merged LSHS operates in workgroups, but can still have a different number of LS and HS invocations */
|
||||||
setup_tcs_info(&ctx, shaders[1], shaders[0]);
|
setup_tcs_info(&ctx, shaders[1], shaders[0]);
|
||||||
program->workgroup_size = ctx.tcs_num_patches * MAX2(shaders[1]->info.tess.tcs_vertices_out, ctx.args->options->key.tcs.input_vertices);
|
program->workgroup_size = ctx.tcs_num_patches * MAX2(shaders[1]->info.tess.tcs_vertices_out, ctx.args->options->key.tcs.input_vertices);
|
||||||
} else if (program->stage & hw_ngg_gs) {
|
} else if (program->stage.hw == HWStage::NGG_GS) {
|
||||||
gfx10_ngg_info &ngg_info = args->shader_info->ngg_info;
|
gfx10_ngg_info &ngg_info = args->shader_info->ngg_info;
|
||||||
|
|
||||||
/* Max ES (SW VS) threads */
|
/* Max ES (SW VS) threads */
|
||||||
|
|
|
@ -1500,50 +1500,101 @@ struct Block {
|
||||||
Block() : index(0) {}
|
Block() : index(0) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
using Stage = uint16_t;
|
/*
|
||||||
|
* Shader stages as provided in Vulkan by the application. Contrast this to HWStage.
|
||||||
|
*/
|
||||||
|
enum class SWStage : uint8_t {
|
||||||
|
None = 0,
|
||||||
|
VS = 1 << 0, /* Vertex Shader */
|
||||||
|
GS = 1 << 1, /* Geometry Shader */
|
||||||
|
TCS = 1 << 2, /* Tessellation Control aka Hull Shader */
|
||||||
|
TES = 1 << 3, /* Tessellation Evaluation aka Domain Shader */
|
||||||
|
FS = 1 << 4, /* Fragment aka Pixel Shader */
|
||||||
|
CS = 1 << 5, /* Compute Shader */
|
||||||
|
GSCopy = 1 << 6, /* GS Copy Shader (internal) */
|
||||||
|
|
||||||
/* software stages */
|
/* Stage combinations merged to run on a single HWStage */
|
||||||
static constexpr Stage sw_vs = 1 << 0;
|
VS_GS = VS | GS,
|
||||||
static constexpr Stage sw_gs = 1 << 1;
|
VS_TCS = VS | TCS,
|
||||||
static constexpr Stage sw_tcs = 1 << 2;
|
TES_GS = TES | GS,
|
||||||
static constexpr Stage sw_tes = 1 << 3;
|
};
|
||||||
static constexpr Stage sw_fs = 1 << 4;
|
|
||||||
static constexpr Stage sw_cs = 1 << 5;
|
|
||||||
static constexpr Stage sw_gs_copy = 1 << 6;
|
|
||||||
static constexpr Stage sw_mask = 0x7f;
|
|
||||||
|
|
||||||
/* hardware stages (can't be OR'd, just a mask for convenience when testing multiple) */
|
constexpr SWStage operator|(SWStage a, SWStage b) {
|
||||||
static constexpr Stage hw_vs = 1 << 7;
|
return static_cast<SWStage>(static_cast<uint8_t>(a) | static_cast<uint8_t>(b));
|
||||||
static constexpr Stage hw_es = 1 << 8; /* Export shader: pre-GS (VS or TES) on GFX6-8. Combined into GS on GFX9 (and GFX10/legacy). */
|
}
|
||||||
static constexpr Stage hw_gs = 1 << 9; /* Geometry shader on GFX10/legacy and GFX6-9. */
|
|
||||||
static constexpr Stage hw_ngg_gs = 1 << 10; /* Geometry shader on GFX10/NGG. */
|
/*
|
||||||
static constexpr Stage hw_ls = 1 << 11; /* Local shader: pre-TCS (VS) on GFX6-8. Combined into HS on GFX9 (and GFX10/legacy). */
|
* Shader stages as running on the AMD GPU.
|
||||||
static constexpr Stage hw_hs = 1 << 12; /* Hull shader: TCS on GFX6-8. Merged VS and TCS on GFX9-10. */
|
*
|
||||||
static constexpr Stage hw_fs = 1 << 13;
|
* The relation between HWStages and SWStages is not a one-to-one mapping:
|
||||||
static constexpr Stage hw_cs = 1 << 14;
|
* Some SWStages are merged by ACO to run on a single HWStage.
|
||||||
static constexpr Stage hw_mask = 0xff << 7;
|
* See README.md for details.
|
||||||
|
*/
|
||||||
|
enum class HWStage : uint8_t {
|
||||||
|
VS,
|
||||||
|
ES, /* Export shader: pre-GS (VS or TES) on GFX6-8. Combined into GS on GFX9 (and GFX10/legacy). */
|
||||||
|
GS, /* Geometry shader on GFX10/legacy and GFX6-9. */
|
||||||
|
NGG_GS, /* Geometry shader on GFX10/NGG. */
|
||||||
|
LS, /* Local shader: pre-TCS (VS) on GFX6-8. Combined into HS on GFX9 (and GFX10/legacy). */
|
||||||
|
HS, /* Hull shader: TCS on GFX6-8. Merged VS and TCS on GFX9-10. */
|
||||||
|
FS,
|
||||||
|
CS,
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set of SWStages to be merged into a single shader paired with the
|
||||||
|
* HWStage it will run on.
|
||||||
|
*/
|
||||||
|
struct Stage {
|
||||||
|
constexpr Stage() = default;
|
||||||
|
|
||||||
|
explicit constexpr Stage(HWStage hw_, SWStage sw_) : sw(sw_), hw(hw_) { }
|
||||||
|
|
||||||
|
/* Check if the given SWStage is included */
|
||||||
|
constexpr bool has(SWStage stage) const {
|
||||||
|
return (static_cast<uint8_t>(sw) & static_cast<uint8_t>(stage));
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned num_sw_stages() const {
|
||||||
|
return util_bitcount(static_cast<uint8_t>(sw));
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr bool operator==(const Stage& other) const {
|
||||||
|
return sw == other.sw && hw == other.hw;
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr bool operator!=(const Stage& other) const {
|
||||||
|
return sw != other.sw || hw != other.hw;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Mask of merged software stages */
|
||||||
|
SWStage sw = SWStage::None;
|
||||||
|
|
||||||
|
/* Active hardware stage */
|
||||||
|
HWStage hw {};
|
||||||
|
};
|
||||||
|
|
||||||
/* possible settings of Program::stage */
|
/* possible settings of Program::stage */
|
||||||
static constexpr Stage vertex_vs = sw_vs | hw_vs;
|
static constexpr Stage vertex_vs(HWStage::VS, SWStage::VS);
|
||||||
static constexpr Stage fragment_fs = sw_fs | hw_fs;
|
static constexpr Stage fragment_fs(HWStage::FS, SWStage::FS);
|
||||||
static constexpr Stage compute_cs = sw_cs | hw_cs;
|
static constexpr Stage compute_cs(HWStage::CS, SWStage::CS);
|
||||||
static constexpr Stage tess_eval_vs = sw_tes | hw_vs;
|
static constexpr Stage tess_eval_vs(HWStage::VS, SWStage::TES);
|
||||||
static constexpr Stage gs_copy_vs = sw_gs_copy | hw_vs;
|
static constexpr Stage gs_copy_vs(HWStage::VS, SWStage::GSCopy);
|
||||||
/* GFX10/NGG */
|
/* GFX10/NGG */
|
||||||
static constexpr Stage ngg_vertex_gs = sw_vs | hw_ngg_gs;
|
static constexpr Stage ngg_vertex_gs(HWStage::NGG_GS, SWStage::VS);
|
||||||
static constexpr Stage ngg_vertex_geometry_gs = sw_vs | sw_gs | hw_ngg_gs;
|
static constexpr Stage ngg_vertex_geometry_gs(HWStage::NGG_GS, SWStage::VS_GS);
|
||||||
static constexpr Stage ngg_tess_eval_gs = sw_tes | hw_ngg_gs;
|
static constexpr Stage ngg_tess_eval_gs(HWStage::NGG_GS, SWStage::TES);
|
||||||
static constexpr Stage ngg_tess_eval_geometry_gs = sw_tes | sw_gs | hw_ngg_gs;
|
static constexpr Stage ngg_tess_eval_geometry_gs(HWStage::NGG_GS, SWStage::TES_GS);
|
||||||
/* GFX9 (and GFX10 if NGG isn't used) */
|
/* GFX9 (and GFX10 if NGG isn't used) */
|
||||||
static constexpr Stage vertex_geometry_gs = sw_vs | sw_gs | hw_gs;
|
static constexpr Stage vertex_geometry_gs(HWStage::GS, SWStage::VS_GS);
|
||||||
static constexpr Stage vertex_tess_control_hs = sw_vs | sw_tcs | hw_hs;
|
static constexpr Stage vertex_tess_control_hs(HWStage::HS, SWStage::VS_TCS);
|
||||||
static constexpr Stage tess_eval_geometry_gs = sw_tes | sw_gs | hw_gs;
|
static constexpr Stage tess_eval_geometry_gs(HWStage::GS, SWStage::TES_GS);
|
||||||
/* pre-GFX9 */
|
/* pre-GFX9 */
|
||||||
static constexpr Stage vertex_ls = sw_vs | hw_ls; /* vertex before tesselation control */
|
static constexpr Stage vertex_ls(HWStage::LS, SWStage::VS); /* vertex before tesselation control */
|
||||||
static constexpr Stage vertex_es = sw_vs | hw_es; /* vertex before geometry */
|
static constexpr Stage vertex_es(HWStage::ES, SWStage::VS); /* vertex before geometry */
|
||||||
static constexpr Stage tess_control_hs = sw_tcs | hw_hs;
|
static constexpr Stage tess_control_hs(HWStage::HS, SWStage::TCS);
|
||||||
static constexpr Stage tess_eval_es = sw_tes | hw_es; /* tesselation evaluation before geometry */
|
static constexpr Stage tess_eval_es(HWStage::ES, SWStage::TES); /* tesselation evaluation before geometry */
|
||||||
static constexpr Stage geometry_gs = sw_gs | hw_gs;
|
static constexpr Stage geometry_gs(HWStage::GS, SWStage::GS);
|
||||||
|
|
||||||
enum statistic {
|
enum statistic {
|
||||||
statistic_hash,
|
statistic_hash,
|
||||||
|
@ -1574,7 +1625,7 @@ public:
|
||||||
enum radeon_family family;
|
enum radeon_family family;
|
||||||
unsigned wave_size;
|
unsigned wave_size;
|
||||||
RegClass lane_mask;
|
RegClass lane_mask;
|
||||||
Stage stage; /* Stage */
|
Stage stage;
|
||||||
bool needs_exact = false; /* there exists an instruction with disable_wqm = true */
|
bool needs_exact = false; /* there exists an instruction with disable_wqm = true */
|
||||||
bool needs_wqm = false; /* there exists a p_wqm instruction */
|
bool needs_wqm = false; /* there exists a p_wqm instruction */
|
||||||
bool wb_smem_l1_on_end = false;
|
bool wb_smem_l1_on_end = false;
|
||||||
|
|
|
@ -1726,7 +1726,7 @@ void lower_to_hw_instr(Program* program)
|
||||||
/* don't bother with an early exit near the end of the program */
|
/* don't bother with an early exit near the end of the program */
|
||||||
if ((block->instructions.size() - 1 - j) <= 4 &&
|
if ((block->instructions.size() - 1 - j) <= 4 &&
|
||||||
block->instructions.back()->opcode == aco_opcode::s_endpgm) {
|
block->instructions.back()->opcode == aco_opcode::s_endpgm) {
|
||||||
unsigned null_exp_dest = (ctx.program->stage & hw_fs) ? 9 /* NULL */ : V_008DFC_SQ_EXP_POS;
|
unsigned null_exp_dest = (ctx.program->stage.hw == HWStage::FS) ? 9 /* NULL */ : V_008DFC_SQ_EXP_POS;
|
||||||
bool ignore_early_exit = true;
|
bool ignore_early_exit = true;
|
||||||
|
|
||||||
for (unsigned k = j + 1; k < block->instructions.size(); ++k) {
|
for (unsigned k = j + 1; k < block->instructions.size(); ++k) {
|
||||||
|
|
|
@ -862,7 +862,8 @@ void schedule_block(sched_ctx& ctx, Program *program, Block* block, live& live_v
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((program->stage & (hw_vs | hw_ngg_gs)) && (block->kind & block_kind_export_end)) {
|
if ((program->stage.hw == HWStage::VS || program->stage.hw == HWStage::NGG_GS) &&
|
||||||
|
(block->kind & block_kind_export_end)) {
|
||||||
/* Try to move position exports as far up as possible, to reduce register
|
/* Try to move position exports as far up as possible, to reduce register
|
||||||
* usage and because ISA reference guides say so. */
|
* usage and because ISA reference guides say so. */
|
||||||
for (unsigned idx = 0; idx < block->instructions.size(); idx++) {
|
for (unsigned idx = 0; idx < block->instructions.size(); idx++) {
|
||||||
|
|
Loading…
Reference in New Issue