diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp index 7683c8f45af..9544fc35bd4 100644 --- a/src/amd/compiler/aco_assembler.cpp +++ b/src/amd/compiler/aco_assembler.cpp @@ -595,11 +595,11 @@ void emit_block(asm_context& ctx, std::vector& out, Block& block) void fix_exports(asm_context& ctx, std::vector& out, Program* program) { + bool exported = false; for (Block& block : program->blocks) { if (!(block.kind & block_kind_export_end)) continue; std::vector>::reverse_iterator it = block.instructions.rbegin(); - bool exported = false; while ( it != block.instructions.rend()) { if ((*it)->format == Format::EXP) { @@ -620,22 +620,13 @@ void fix_exports(asm_context& ctx, std::vector& out, Program* program) break; ++it; } - if (exported) - continue; - /* we didn't find an Export instruction and have to insert a null export */ - aco_ptr exp{create_instruction(aco_opcode::exp, Format::EXP, 4, 0)}; - for (unsigned i = 0; i < 4; i++) - exp->operands[i] = Operand(v1); - exp->enabled_mask = 0; - exp->compressed = false; - exp->done = true; - exp->valid_mask = (program->stage & hw_fs) || program->chip_class >= GFX10; - if (program->stage & hw_fs) - exp->dest = 9; /* NULL */ - else - exp->dest = V_008DFC_SQ_EXP_POS; - /* insert the null export 1 instruction before branch/endpgm */ - block.instructions.insert(block.instructions.end() - 1, std::move(exp)); + } + + if (!exported) { + /* Abort in order to avoid a GPU hang. */ + fprintf(stderr, "Missing export in %s shader:\n", (program->stage & hw_vs) ? "vertex" : "fragment"); + aco_print_program(program, stderr); + abort(); } } diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index a8b318c63c5..b9d203a6391 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -8981,7 +8981,20 @@ static bool visit_cf_list(isel_context *ctx, return false; } -static void export_vs_varying(isel_context *ctx, int slot, bool is_pos, int *next_pos) +static void create_null_export(isel_context *ctx) +{ + /* Some shader stages always need to have exports. + * So when there is none, we need to add a null export. + */ + + unsigned dest = (ctx->program->stage & hw_fs) ? 9 /* NULL */ : V_008DFC_SQ_EXP_POS; + bool vm = (ctx->program->stage & hw_fs) || ctx->program->chip_class >= GFX10; + Builder bld(ctx->program, ctx->block); + bld.exp(aco_opcode::exp, Operand(v1), Operand(v1), Operand(v1), Operand(v1), + /* enabled_mask */ 0, dest, /* compr */ false, /* done */ true, vm); +} + +static bool export_vs_varying(isel_context *ctx, int slot, bool is_pos, int *next_pos) { assert(ctx->stage == vertex_vs || ctx->stage == tess_eval_vs || @@ -8992,9 +9005,9 @@ static void export_vs_varying(isel_context *ctx, int slot, bool is_pos, int *nex : ctx->program->info->vs.outinfo.vs_output_param_offset[slot]; uint64_t mask = ctx->outputs.mask[slot]; if (!is_pos && !mask) - return; + return false; if (!is_pos && offset == AC_EXP_PARAM_UNDEFINED) - return; + return false; aco_ptr exp{create_instruction(aco_opcode::exp, Format::EXP, 4, 0)}; exp->enabled_mask = mask; for (unsigned i = 0; i < 4; ++i) { @@ -9014,6 +9027,8 @@ static void export_vs_varying(isel_context *ctx, int slot, bool is_pos, int *nex else exp->dest = V_008DFC_SQ_EXP_PARAM + offset; ctx->block->instructions.emplace_back(std::move(exp)); + + return true; } static void export_vs_psiz_layer_viewport(isel_context *ctx, int *next_pos) @@ -9075,14 +9090,15 @@ static void create_vs_exports(isel_context *ctx) /* the order these position exports are created is important */ int next_pos = 0; - export_vs_varying(ctx, VARYING_SLOT_POS, true, &next_pos); + bool exported_pos = export_vs_varying(ctx, VARYING_SLOT_POS, true, &next_pos); if (outinfo->writes_pointsize || outinfo->writes_layer || outinfo->writes_viewport_index) { export_vs_psiz_layer_viewport(ctx, &next_pos); + exported_pos = true; } if (ctx->num_clip_distances + ctx->num_cull_distances > 0) - export_vs_varying(ctx, VARYING_SLOT_CLIP_DIST0, true, &next_pos); + exported_pos |= export_vs_varying(ctx, VARYING_SLOT_CLIP_DIST0, true, &next_pos); if (ctx->num_clip_distances + ctx->num_cull_distances > 4) - export_vs_varying(ctx, VARYING_SLOT_CLIP_DIST1, true, &next_pos); + exported_pos |= export_vs_varying(ctx, VARYING_SLOT_CLIP_DIST1, true, &next_pos); if (ctx->export_clip_dists) { if (ctx->num_clip_distances + ctx->num_cull_distances > 0) @@ -9098,9 +9114,12 @@ static void create_vs_exports(isel_context *ctx) export_vs_varying(ctx, i, false, NULL); } + + if (!exported_pos) + create_null_export(ctx); } -static void export_fs_mrt_z(isel_context *ctx) +static bool export_fs_mrt_z(isel_context *ctx) { Builder bld(ctx->program, ctx->block); unsigned enabled_channels = 0; @@ -9157,9 +9176,11 @@ static void export_fs_mrt_z(isel_context *ctx) bld.exp(aco_opcode::exp, values[0], values[1], values[2], values[3], enabled_channels, V_008DFC_SQ_EXP_MRTZ, compr); + + return true; } -static void export_fs_mrt_color(isel_context *ctx, int slot) +static bool export_fs_mrt_color(isel_context *ctx, int slot) { Builder bld(ctx->program, ctx->block); unsigned write_mask = ctx->outputs.mask[slot]; @@ -9276,7 +9297,7 @@ static void export_fs_mrt_color(isel_context *ctx, int slot) } if (target == V_008DFC_SQ_EXP_NULL) - return; + return false; if ((bool) compr_op) { for (int i = 0; i < 2; i++) { @@ -9300,22 +9321,26 @@ static void export_fs_mrt_color(isel_context *ctx, int slot) bld.exp(aco_opcode::exp, values[0], values[1], values[2], values[3], enabled_channels, target, (bool) compr_op); + return true; } static void create_fs_exports(isel_context *ctx) { + bool exported = false; + /* Export depth, stencil and sample mask. */ if (ctx->outputs.mask[FRAG_RESULT_DEPTH] || ctx->outputs.mask[FRAG_RESULT_STENCIL] || - ctx->outputs.mask[FRAG_RESULT_SAMPLE_MASK]) { - export_fs_mrt_z(ctx); - } + ctx->outputs.mask[FRAG_RESULT_SAMPLE_MASK]) + exported |= export_fs_mrt_z(ctx); /* Export all color render targets. */ - for (unsigned i = FRAG_RESULT_DATA0; i < FRAG_RESULT_DATA7 + 1; ++i) { + for (unsigned i = FRAG_RESULT_DATA0; i < FRAG_RESULT_DATA7 + 1; ++i) if (ctx->outputs.mask[i]) - export_fs_mrt_color(ctx, i); - } + exported |= export_fs_mrt_color(ctx, i); + + if (!exported) + create_null_export(ctx); } static void write_tcs_tess_factors(isel_context *ctx) diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index f42484c502f..6f2f54f6992 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -969,10 +969,30 @@ void lower_to_hw_instr(Program* program) } case aco_opcode::p_exit_early_if: { - /* don't bother with an early exit at the end of the program */ - if (block->instructions[j + 1]->opcode == aco_opcode::p_logical_end && - block->instructions[j + 2]->opcode == aco_opcode::s_endpgm) { - break; + /* don't bother with an early exit near the end of the program */ + if ((block->instructions.size() - 1 - j) <= 4 && + block->instructions.back()->opcode == aco_opcode::s_endpgm) { + unsigned null_exp_dest = (ctx.program->stage & hw_fs) ? 9 /* NULL */ : V_008DFC_SQ_EXP_POS; + bool ignore_early_exit = true; + + for (unsigned k = j + 1; k < block->instructions.size(); ++k) { + const aco_ptr &instr = block->instructions[k]; + if (instr->opcode == aco_opcode::s_endpgm || + instr->opcode == aco_opcode::p_logical_end) + continue; + else if (instr->opcode == aco_opcode::exp && + static_cast(instr.get())->dest == null_exp_dest) + continue; + else if (instr->opcode == aco_opcode::p_parallelcopy && + instr->definitions[0].isFixed() && + instr->definitions[0].physReg() == exec) + continue; + + ignore_early_exit = false; + } + + if (ignore_early_exit) + break; } if (!discard_block) {