aco: implement GS copy shaders
v5: rebase on float_controls changes v7: rebase after shader args MR and load/store vectorizer MR Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/2421>
This commit is contained in:
parent
de4ce66f5c
commit
f8f7712666
|
@ -25,6 +25,7 @@
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <array>
|
#include <array>
|
||||||
|
#include <stack>
|
||||||
#include <map>
|
#include <map>
|
||||||
|
|
||||||
#include "ac_shader_util.h"
|
#include "ac_shader_util.h"
|
||||||
|
@ -8534,7 +8535,7 @@ static void create_vs_exports(isel_context *ctx)
|
||||||
if (ctx->num_clip_distances + ctx->num_cull_distances > 4)
|
if (ctx->num_clip_distances + ctx->num_cull_distances > 4)
|
||||||
export_vs_varying(ctx, VARYING_SLOT_CLIP_DIST1, true, &next_pos);
|
export_vs_varying(ctx, VARYING_SLOT_CLIP_DIST1, true, &next_pos);
|
||||||
|
|
||||||
if (ctx->options->key.vs_common_out.export_clip_dists) {
|
if (ctx->export_clip_dists) {
|
||||||
if (ctx->num_clip_distances + ctx->num_cull_distances > 0)
|
if (ctx->num_clip_distances + ctx->num_cull_distances > 0)
|
||||||
export_vs_varying(ctx, VARYING_SLOT_CLIP_DIST0, false, &next_pos);
|
export_vs_varying(ctx, VARYING_SLOT_CLIP_DIST0, false, &next_pos);
|
||||||
if (ctx->num_clip_distances + ctx->num_cull_distances > 4)
|
if (ctx->num_clip_distances + ctx->num_cull_distances > 4)
|
||||||
|
@ -8568,7 +8569,7 @@ static void emit_stream_output(isel_context *ctx,
|
||||||
|
|
||||||
Temp out[4];
|
Temp out[4];
|
||||||
bool all_undef = true;
|
bool all_undef = true;
|
||||||
assert(ctx->stage == vertex_vs);
|
assert(ctx->stage == vertex_vs || ctx->stage == gs_copy_vs);
|
||||||
for (unsigned i = 0; i < num_comps; i++) {
|
for (unsigned i = 0; i < num_comps; i++) {
|
||||||
out[i] = ctx->vsgs_output.outputs[loc][start + i];
|
out[i] = ctx->vsgs_output.outputs[loc][start + i];
|
||||||
all_undef = all_undef && !out[i].id();
|
all_undef = all_undef && !out[i].id();
|
||||||
|
@ -8804,13 +8805,24 @@ void setup_fp_mode(isel_context *ctx, nir_shader *shader)
|
||||||
ctx->block->fp_mode = program->next_fp_mode;
|
ctx->block->fp_mode = program->next_fp_mode;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void cleanup_cfg(Program *program)
|
||||||
|
{
|
||||||
|
/* create linear_succs/logical_succs */
|
||||||
|
for (Block& BB : program->blocks) {
|
||||||
|
for (unsigned idx : BB.linear_preds)
|
||||||
|
program->blocks[idx].linear_succs.emplace_back(BB.index);
|
||||||
|
for (unsigned idx : BB.logical_preds)
|
||||||
|
program->blocks[idx].logical_succs.emplace_back(BB.index);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void select_program(Program *program,
|
void select_program(Program *program,
|
||||||
unsigned shader_count,
|
unsigned shader_count,
|
||||||
struct nir_shader *const *shaders,
|
struct nir_shader *const *shaders,
|
||||||
ac_shader_config* config,
|
ac_shader_config* config,
|
||||||
struct radv_shader_args *args)
|
struct radv_shader_args *args)
|
||||||
{
|
{
|
||||||
isel_context ctx = setup_isel_context(program, shader_count, shaders, config, args);
|
isel_context ctx = setup_isel_context(program, shader_count, shaders, config, args, false);
|
||||||
|
|
||||||
for (unsigned i = 0; i < shader_count; i++) {
|
for (unsigned i = 0; i < shader_count; i++) {
|
||||||
nir_shader *nir = shaders[i];
|
nir_shader *nir = shaders[i];
|
||||||
|
@ -8879,12 +8891,162 @@ void select_program(Program *program,
|
||||||
bld.smem(aco_opcode::s_dcache_wb, false);
|
bld.smem(aco_opcode::s_dcache_wb, false);
|
||||||
bld.sopp(aco_opcode::s_endpgm);
|
bld.sopp(aco_opcode::s_endpgm);
|
||||||
|
|
||||||
/* cleanup CFG */
|
cleanup_cfg(program);
|
||||||
for (Block& BB : program->blocks) {
|
}
|
||||||
for (unsigned idx : BB.linear_preds)
|
|
||||||
program->blocks[idx].linear_succs.emplace_back(BB.index);
|
void select_gs_copy_shader(Program *program, struct nir_shader *gs_shader,
|
||||||
for (unsigned idx : BB.logical_preds)
|
ac_shader_config* config,
|
||||||
program->blocks[idx].logical_succs.emplace_back(BB.index);
|
struct radv_shader_args *args)
|
||||||
|
{
|
||||||
|
isel_context ctx = setup_isel_context(program, 1, &gs_shader, config, args, true);
|
||||||
|
|
||||||
|
program->next_fp_mode.preserve_signed_zero_inf_nan32 = false;
|
||||||
|
program->next_fp_mode.preserve_signed_zero_inf_nan16_64 = false;
|
||||||
|
program->next_fp_mode.must_flush_denorms32 = false;
|
||||||
|
program->next_fp_mode.must_flush_denorms16_64 = false;
|
||||||
|
program->next_fp_mode.care_about_round32 = false;
|
||||||
|
program->next_fp_mode.care_about_round16_64 = false;
|
||||||
|
program->next_fp_mode.denorm16_64 = fp_denorm_keep;
|
||||||
|
program->next_fp_mode.denorm32 = 0;
|
||||||
|
program->next_fp_mode.round32 = fp_round_ne;
|
||||||
|
program->next_fp_mode.round16_64 = fp_round_ne;
|
||||||
|
ctx.block->fp_mode = program->next_fp_mode;
|
||||||
|
|
||||||
|
add_startpgm(&ctx);
|
||||||
|
append_logical_start(ctx.block);
|
||||||
|
|
||||||
|
Builder bld(ctx.program, ctx.block);
|
||||||
|
|
||||||
|
Temp gsvs_ring = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), program->private_segment_buffer, Operand(RING_GSVS_VS * 16u));
|
||||||
|
|
||||||
|
Operand stream_id(0u);
|
||||||
|
if (args->shader_info->so.num_outputs)
|
||||||
|
stream_id = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
|
||||||
|
get_arg(&ctx, ctx.args->streamout_config), Operand(0x20018u));
|
||||||
|
|
||||||
|
Temp vtx_offset = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(2u), get_arg(&ctx, ctx.args->ac.vertex_id));
|
||||||
|
|
||||||
|
std::stack<Block> endif_blocks;
|
||||||
|
|
||||||
|
for (unsigned stream = 0; stream < 4; stream++) {
|
||||||
|
if (stream_id.isConstant() && stream != stream_id.constantValue())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
unsigned num_components = args->shader_info->gs.num_stream_output_components[stream];
|
||||||
|
if (stream > 0 && (!num_components || !args->shader_info->so.num_outputs))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
memset(ctx.vsgs_output.mask, 0, sizeof(ctx.vsgs_output.mask));
|
||||||
|
|
||||||
|
unsigned BB_if_idx = ctx.block->index;
|
||||||
|
Block BB_endif = Block();
|
||||||
|
if (!stream_id.isConstant()) {
|
||||||
|
/* begin IF */
|
||||||
|
Temp cond = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), stream_id, Operand(stream));
|
||||||
|
append_logical_end(ctx.block);
|
||||||
|
ctx.block->kind |= block_kind_uniform;
|
||||||
|
bld.branch(aco_opcode::p_cbranch_z, cond);
|
||||||
|
|
||||||
|
BB_endif.kind |= ctx.block->kind & block_kind_top_level;
|
||||||
|
|
||||||
|
ctx.block = ctx.program->create_and_insert_block();
|
||||||
|
add_edge(BB_if_idx, ctx.block);
|
||||||
|
bld.reset(ctx.block);
|
||||||
|
append_logical_start(ctx.block);
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned offset = 0;
|
||||||
|
for (unsigned i = 0; i <= VARYING_SLOT_VAR31; ++i) {
|
||||||
|
if (args->shader_info->gs.output_streams[i] != stream)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
unsigned output_usage_mask = args->shader_info->gs.output_usage_mask[i];
|
||||||
|
unsigned length = util_last_bit(output_usage_mask);
|
||||||
|
for (unsigned j = 0; j < length; ++j) {
|
||||||
|
if (!(output_usage_mask & (1 << j)))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
unsigned const_offset = offset * args->shader_info->gs.vertices_out * 16 * 4;
|
||||||
|
Temp voffset = vtx_offset;
|
||||||
|
if (const_offset >= 4096u) {
|
||||||
|
voffset = bld.vadd32(bld.def(v1), Operand(const_offset / 4096u * 4096u), voffset);
|
||||||
|
const_offset %= 4096u;
|
||||||
|
}
|
||||||
|
|
||||||
|
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(aco_opcode::buffer_load_dword, Format::MUBUF, 3, 1)};
|
||||||
|
mubuf->definitions[0] = bld.def(v1);
|
||||||
|
mubuf->operands[0] = Operand(voffset);
|
||||||
|
mubuf->operands[1] = Operand(gsvs_ring);
|
||||||
|
mubuf->operands[2] = Operand(0u);
|
||||||
|
mubuf->offen = true;
|
||||||
|
mubuf->offset = const_offset;
|
||||||
|
mubuf->glc = true;
|
||||||
|
mubuf->slc = true;
|
||||||
|
mubuf->dlc = args->options->chip_class >= GFX10;
|
||||||
|
mubuf->barrier = barrier_none;
|
||||||
|
mubuf->can_reorder = true;
|
||||||
|
|
||||||
|
ctx.vsgs_output.mask[i] |= 1 << j;
|
||||||
|
ctx.vsgs_output.outputs[i][j] = mubuf->definitions[0].getTemp();
|
||||||
|
|
||||||
|
bld.insert(std::move(mubuf));
|
||||||
|
|
||||||
|
offset++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (args->shader_info->so.num_outputs) {
|
||||||
|
emit_streamout(&ctx, stream);
|
||||||
|
bld.reset(ctx.block);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (stream == 0) {
|
||||||
|
create_vs_exports(&ctx);
|
||||||
|
ctx.block->kind |= block_kind_export_end;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!stream_id.isConstant()) {
|
||||||
|
append_logical_end(ctx.block);
|
||||||
|
|
||||||
|
/* branch from then block to endif block */
|
||||||
|
bld.branch(aco_opcode::p_branch);
|
||||||
|
add_edge(ctx.block->index, &BB_endif);
|
||||||
|
ctx.block->kind |= block_kind_uniform;
|
||||||
|
|
||||||
|
/* emit else block */
|
||||||
|
ctx.block = ctx.program->create_and_insert_block();
|
||||||
|
add_edge(BB_if_idx, ctx.block);
|
||||||
|
bld.reset(ctx.block);
|
||||||
|
append_logical_start(ctx.block);
|
||||||
|
|
||||||
|
endif_blocks.push(std::move(BB_endif));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
while (!endif_blocks.empty()) {
|
||||||
|
Block BB_endif = std::move(endif_blocks.top());
|
||||||
|
endif_blocks.pop();
|
||||||
|
|
||||||
|
Block *BB_else = ctx.block;
|
||||||
|
|
||||||
|
append_logical_end(BB_else);
|
||||||
|
/* branch from else block to endif block */
|
||||||
|
bld.branch(aco_opcode::p_branch);
|
||||||
|
add_edge(BB_else->index, &BB_endif);
|
||||||
|
BB_else->kind |= block_kind_uniform;
|
||||||
|
|
||||||
|
/** emit endif merge block */
|
||||||
|
ctx.block = program->insert_block(std::move(BB_endif));
|
||||||
|
bld.reset(ctx.block);
|
||||||
|
append_logical_start(ctx.block);
|
||||||
|
}
|
||||||
|
|
||||||
|
program->config->float_mode = program->blocks[0].fp_mode.val;
|
||||||
|
|
||||||
|
append_logical_end(ctx.block);
|
||||||
|
ctx.block->kind |= block_kind_uniform;
|
||||||
|
bld.sopp(aco_opcode::s_endpgm);
|
||||||
|
|
||||||
|
cleanup_cfg(program);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -85,6 +85,7 @@ struct isel_context {
|
||||||
uint64_t output_masks[MESA_SHADER_COMPUTE];
|
uint64_t output_masks[MESA_SHADER_COMPUTE];
|
||||||
|
|
||||||
/* VS output information */
|
/* VS output information */
|
||||||
|
bool export_clip_dists;
|
||||||
unsigned num_clip_distances;
|
unsigned num_clip_distances;
|
||||||
unsigned num_cull_distances;
|
unsigned num_cull_distances;
|
||||||
|
|
||||||
|
@ -661,6 +662,54 @@ mem_vectorize_callback(unsigned align, unsigned bit_size,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
setup_vs_output_info(isel_context *ctx, nir_shader *nir,
|
||||||
|
bool export_prim_id, bool export_clip_dists,
|
||||||
|
radv_vs_output_info *outinfo)
|
||||||
|
{
|
||||||
|
memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
|
||||||
|
sizeof(outinfo->vs_output_param_offset));
|
||||||
|
|
||||||
|
outinfo->param_exports = 0;
|
||||||
|
int pos_written = 0x1;
|
||||||
|
if (outinfo->writes_pointsize || outinfo->writes_viewport_index || outinfo->writes_layer)
|
||||||
|
pos_written |= 1 << 1;
|
||||||
|
|
||||||
|
uint64_t mask = ctx->output_masks[nir->info.stage];
|
||||||
|
while (mask) {
|
||||||
|
int idx = u_bit_scan64(&mask);
|
||||||
|
if (idx >= VARYING_SLOT_VAR0 || idx == VARYING_SLOT_LAYER || idx == VARYING_SLOT_PRIMITIVE_ID ||
|
||||||
|
((idx == VARYING_SLOT_CLIP_DIST0 || idx == VARYING_SLOT_CLIP_DIST1) && export_clip_dists)) {
|
||||||
|
if (outinfo->vs_output_param_offset[idx] == AC_EXP_PARAM_UNDEFINED)
|
||||||
|
outinfo->vs_output_param_offset[idx] = outinfo->param_exports++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (outinfo->writes_layer &&
|
||||||
|
outinfo->vs_output_param_offset[VARYING_SLOT_LAYER] == AC_EXP_PARAM_UNDEFINED) {
|
||||||
|
/* when ctx->options->key.has_multiview_view_index = true, the layer
|
||||||
|
* variable isn't declared in NIR and it's isel's job to get the layer */
|
||||||
|
outinfo->vs_output_param_offset[VARYING_SLOT_LAYER] = outinfo->param_exports++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (export_prim_id) {
|
||||||
|
assert(outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] == AC_EXP_PARAM_UNDEFINED);
|
||||||
|
outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = outinfo->param_exports++;
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx->export_clip_dists = export_clip_dists;
|
||||||
|
ctx->num_clip_distances = util_bitcount(outinfo->clip_dist_mask);
|
||||||
|
ctx->num_cull_distances = util_bitcount(outinfo->cull_dist_mask);
|
||||||
|
|
||||||
|
assert(ctx->num_clip_distances + ctx->num_cull_distances <= 8);
|
||||||
|
|
||||||
|
if (ctx->num_clip_distances + ctx->num_cull_distances > 0)
|
||||||
|
pos_written |= 1 << 2;
|
||||||
|
if (ctx->num_clip_distances + ctx->num_cull_distances > 4)
|
||||||
|
pos_written |= 1 << 3;
|
||||||
|
|
||||||
|
outinfo->pos_exports = util_bitcount(pos_written);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
setup_vs_variables(isel_context *ctx, nir_shader *nir)
|
setup_vs_variables(isel_context *ctx, nir_shader *nir)
|
||||||
{
|
{
|
||||||
|
@ -681,49 +730,8 @@ setup_vs_variables(isel_context *ctx, nir_shader *nir)
|
||||||
|
|
||||||
if (ctx->stage == vertex_vs) {
|
if (ctx->stage == vertex_vs) {
|
||||||
radv_vs_output_info *outinfo = &ctx->program->info->vs.outinfo;
|
radv_vs_output_info *outinfo = &ctx->program->info->vs.outinfo;
|
||||||
|
setup_vs_output_info(ctx, nir, outinfo->export_prim_id,
|
||||||
memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
|
ctx->options->key.vs_common_out.export_clip_dists, outinfo);
|
||||||
sizeof(outinfo->vs_output_param_offset));
|
|
||||||
|
|
||||||
bool export_clip_dists = ctx->options->key.vs_common_out.export_clip_dists;
|
|
||||||
|
|
||||||
outinfo->param_exports = 0;
|
|
||||||
int pos_written = 0x1;
|
|
||||||
if (outinfo->writes_pointsize || outinfo->writes_viewport_index || outinfo->writes_layer)
|
|
||||||
pos_written |= 1 << 1;
|
|
||||||
|
|
||||||
uint64_t mask = ctx->output_masks[nir->info.stage];
|
|
||||||
while (mask) {
|
|
||||||
int idx = u_bit_scan64(&mask);
|
|
||||||
if (idx >= VARYING_SLOT_VAR0 || idx == VARYING_SLOT_LAYER || idx == VARYING_SLOT_PRIMITIVE_ID ||
|
|
||||||
((idx == VARYING_SLOT_CLIP_DIST0 || idx == VARYING_SLOT_CLIP_DIST1) && export_clip_dists)) {
|
|
||||||
if (outinfo->vs_output_param_offset[idx] == AC_EXP_PARAM_UNDEFINED)
|
|
||||||
outinfo->vs_output_param_offset[idx] = outinfo->param_exports++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (outinfo->writes_layer &&
|
|
||||||
outinfo->vs_output_param_offset[VARYING_SLOT_LAYER] == AC_EXP_PARAM_UNDEFINED) {
|
|
||||||
/* when ctx->options->key.has_multiview_view_index = true, the layer
|
|
||||||
* variable isn't declared in NIR and it's isel's job to get the layer */
|
|
||||||
outinfo->vs_output_param_offset[VARYING_SLOT_LAYER] = outinfo->param_exports++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (outinfo->export_prim_id) {
|
|
||||||
assert(outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] == AC_EXP_PARAM_UNDEFINED);
|
|
||||||
outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = outinfo->param_exports++;
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx->num_clip_distances = util_bitcount(outinfo->clip_dist_mask);
|
|
||||||
ctx->num_cull_distances = util_bitcount(outinfo->cull_dist_mask);
|
|
||||||
|
|
||||||
assert(ctx->num_clip_distances + ctx->num_cull_distances <= 8);
|
|
||||||
|
|
||||||
if (ctx->num_clip_distances + ctx->num_cull_distances > 0)
|
|
||||||
pos_written |= 1 << 2;
|
|
||||||
if (ctx->num_clip_distances + ctx->num_cull_distances > 4)
|
|
||||||
pos_written |= 1 << 3;
|
|
||||||
|
|
||||||
outinfo->pos_exports = util_bitcount(pos_written);
|
|
||||||
} else if (ctx->stage == vertex_geometry_gs || ctx->stage == vertex_es) {
|
} else if (ctx->stage == vertex_geometry_gs || ctx->stage == vertex_es) {
|
||||||
/* TODO: radv_nir_shader_info_pass() already sets this but it's larger
|
/* TODO: radv_nir_shader_info_pass() already sets this but it's larger
|
||||||
* than it needs to be in order to set it better, we have to improve
|
* than it needs to be in order to set it better, we have to improve
|
||||||
|
@ -824,12 +832,80 @@ get_io_masks(isel_context *ctx, unsigned shader_count, struct nir_shader *const
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
setup_nir(isel_context *ctx, nir_shader *nir)
|
||||||
|
{
|
||||||
|
Program *program = ctx->program;
|
||||||
|
|
||||||
|
/* align and copy constant data */
|
||||||
|
while (program->constant_data.size() % 4u)
|
||||||
|
program->constant_data.push_back(0);
|
||||||
|
ctx->constant_data_offset = program->constant_data.size();
|
||||||
|
program->constant_data.insert(program->constant_data.end(),
|
||||||
|
(uint8_t*)nir->constant_data,
|
||||||
|
(uint8_t*)nir->constant_data + nir->constant_data_size);
|
||||||
|
|
||||||
|
/* the variable setup has to be done before lower_io / CSE */
|
||||||
|
setup_variables(ctx, nir);
|
||||||
|
|
||||||
|
/* optimize and lower memory operations */
|
||||||
|
bool lower_to_scalar = false;
|
||||||
|
bool lower_pack = false;
|
||||||
|
if (nir_opt_load_store_vectorize(nir,
|
||||||
|
(nir_variable_mode)(nir_var_mem_ssbo | nir_var_mem_ubo |
|
||||||
|
nir_var_mem_push_const | nir_var_mem_shared),
|
||||||
|
mem_vectorize_callback)) {
|
||||||
|
lower_to_scalar = true;
|
||||||
|
lower_pack = true;
|
||||||
|
}
|
||||||
|
if (nir->info.stage != MESA_SHADER_COMPUTE)
|
||||||
|
nir_lower_io(nir, (nir_variable_mode)(nir_var_shader_in | nir_var_shader_out), type_size, (nir_lower_io_options)0);
|
||||||
|
nir_lower_explicit_io(nir, nir_var_mem_global, nir_address_format_64bit_global);
|
||||||
|
|
||||||
|
if (lower_to_scalar)
|
||||||
|
nir_lower_alu_to_scalar(nir, NULL, NULL);
|
||||||
|
if (lower_pack)
|
||||||
|
nir_lower_pack(nir);
|
||||||
|
|
||||||
|
/* lower ALU operations */
|
||||||
|
// TODO: implement logic64 in aco, it's more effective for sgprs
|
||||||
|
nir_lower_int64(nir, nir->options->lower_int64_options);
|
||||||
|
|
||||||
|
nir_opt_idiv_const(nir, 32);
|
||||||
|
nir_lower_idiv(nir, nir_lower_idiv_precise);
|
||||||
|
|
||||||
|
/* optimize the lowered ALU operations */
|
||||||
|
bool more_algebraic = true;
|
||||||
|
while (more_algebraic) {
|
||||||
|
more_algebraic = false;
|
||||||
|
NIR_PASS_V(nir, nir_copy_prop);
|
||||||
|
NIR_PASS_V(nir, nir_opt_dce);
|
||||||
|
NIR_PASS_V(nir, nir_opt_constant_folding);
|
||||||
|
NIR_PASS(more_algebraic, nir, nir_opt_algebraic);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* cleanup passes */
|
||||||
|
nir_lower_load_const_to_scalar(nir);
|
||||||
|
nir_opt_shrink_load(nir);
|
||||||
|
nir_move_options move_opts = (nir_move_options)(
|
||||||
|
nir_move_const_undef | nir_move_load_ubo | nir_move_load_input | nir_move_comparisons);
|
||||||
|
nir_opt_sink(nir, move_opts);
|
||||||
|
nir_opt_move(nir, move_opts);
|
||||||
|
nir_convert_to_lcssa(nir, true, false);
|
||||||
|
nir_lower_phis_to_scalar(nir);
|
||||||
|
|
||||||
|
nir_function_impl *func = nir_shader_get_entrypoint(nir);
|
||||||
|
nir_index_ssa_defs(func);
|
||||||
|
nir_metadata_require(func, nir_metadata_block_index);
|
||||||
|
}
|
||||||
|
|
||||||
isel_context
|
isel_context
|
||||||
setup_isel_context(Program* program,
|
setup_isel_context(Program* program,
|
||||||
unsigned shader_count,
|
unsigned shader_count,
|
||||||
struct nir_shader *const *shaders,
|
struct nir_shader *const *shaders,
|
||||||
ac_shader_config* config,
|
ac_shader_config* config,
|
||||||
struct radv_shader_args *args)
|
struct radv_shader_args *args,
|
||||||
|
bool is_gs_copy_shader)
|
||||||
{
|
{
|
||||||
program->stage = 0;
|
program->stage = 0;
|
||||||
for (unsigned i = 0; i < shader_count; i++) {
|
for (unsigned i = 0; i < shader_count; i++) {
|
||||||
|
@ -844,7 +920,7 @@ setup_isel_context(Program* program,
|
||||||
program->stage |= sw_tes;
|
program->stage |= sw_tes;
|
||||||
break;
|
break;
|
||||||
case MESA_SHADER_GEOMETRY:
|
case MESA_SHADER_GEOMETRY:
|
||||||
program->stage |= sw_gs;
|
program->stage |= is_gs_copy_shader ? sw_gs_copy : sw_gs;
|
||||||
break;
|
break;
|
||||||
case MESA_SHADER_FRAGMENT:
|
case MESA_SHADER_FRAGMENT:
|
||||||
program->stage |= sw_fs;
|
program->stage |= sw_fs;
|
||||||
|
@ -868,6 +944,8 @@ setup_isel_context(Program* program,
|
||||||
program->stage |= hw_fs;
|
program->stage |= hw_fs;
|
||||||
else if (program->stage == sw_cs)
|
else if (program->stage == sw_cs)
|
||||||
program->stage |= hw_cs;
|
program->stage |= hw_cs;
|
||||||
|
else if (program->stage == sw_gs_copy)
|
||||||
|
program->stage |= hw_vs;
|
||||||
else if (program->stage == (sw_vs | sw_gs) && gfx9_plus && !ngg)
|
else if (program->stage == (sw_vs | sw_gs) && gfx9_plus && !ngg)
|
||||||
program->stage |= hw_gs;
|
program->stage |= hw_gs;
|
||||||
else
|
else
|
||||||
|
@ -918,94 +996,25 @@ setup_isel_context(Program* program,
|
||||||
|
|
||||||
get_io_masks(&ctx, shader_count, shaders);
|
get_io_masks(&ctx, shader_count, shaders);
|
||||||
|
|
||||||
for (unsigned i = 0; i < shader_count; i++) {
|
unsigned scratch_size = 0;
|
||||||
nir_shader *nir = shaders[i];
|
if (program->stage == gs_copy_vs) {
|
||||||
|
assert(shader_count == 1);
|
||||||
|
setup_vs_output_info(&ctx, shaders[0], false, true, &args->shader_info->vs.outinfo);
|
||||||
|
} else {
|
||||||
|
for (unsigned i = 0; i < shader_count; i++) {
|
||||||
|
nir_shader *nir = shaders[i];
|
||||||
|
setup_nir(&ctx, nir);
|
||||||
|
|
||||||
/* align and copy constant data */
|
if (args->options->dump_preoptir) {
|
||||||
while (program->constant_data.size() % 4u)
|
fprintf(stderr, "NIR shader before instruction selection:\n");
|
||||||
program->constant_data.push_back(0);
|
nir_print_shader(nir, stderr);
|
||||||
ctx.constant_data_offset = program->constant_data.size();
|
}
|
||||||
program->constant_data.insert(program->constant_data.end(),
|
|
||||||
(uint8_t*)nir->constant_data,
|
|
||||||
(uint8_t*)nir->constant_data + nir->constant_data_size);
|
|
||||||
|
|
||||||
/* the variable setup has to be done before lower_io / CSE */
|
|
||||||
setup_variables(&ctx, nir);
|
|
||||||
|
|
||||||
/* optimize and lower memory operations */
|
|
||||||
bool lower_to_scalar = false;
|
|
||||||
bool lower_pack = false;
|
|
||||||
if (nir_opt_load_store_vectorize(nir,
|
|
||||||
(nir_variable_mode)(nir_var_mem_ssbo | nir_var_mem_ubo |
|
|
||||||
nir_var_mem_push_const | nir_var_mem_shared),
|
|
||||||
mem_vectorize_callback)) {
|
|
||||||
lower_to_scalar = true;
|
|
||||||
lower_pack = true;
|
|
||||||
}
|
|
||||||
if (nir->info.stage != MESA_SHADER_COMPUTE)
|
|
||||||
nir_lower_io(nir, (nir_variable_mode)(nir_var_shader_in | nir_var_shader_out), type_size, (nir_lower_io_options)0);
|
|
||||||
nir_lower_explicit_io(nir, nir_var_mem_global, nir_address_format_64bit_global);
|
|
||||||
|
|
||||||
if (lower_to_scalar)
|
|
||||||
nir_lower_alu_to_scalar(nir, NULL, NULL);
|
|
||||||
if (lower_pack)
|
|
||||||
nir_lower_pack(nir);
|
|
||||||
|
|
||||||
/* lower ALU operations */
|
|
||||||
// TODO: implement logic64 in aco, it's more effective for sgprs
|
|
||||||
nir_lower_int64(nir, nir->options->lower_int64_options);
|
|
||||||
|
|
||||||
nir_opt_idiv_const(nir, 32);
|
|
||||||
nir_lower_idiv(nir, nir_lower_idiv_precise);
|
|
||||||
|
|
||||||
/* optimize the lowered ALU operations */
|
|
||||||
bool more_algebraic = true;
|
|
||||||
while (more_algebraic) {
|
|
||||||
more_algebraic = false;
|
|
||||||
NIR_PASS_V(nir, nir_copy_prop);
|
|
||||||
NIR_PASS_V(nir, nir_opt_dce);
|
|
||||||
NIR_PASS_V(nir, nir_opt_constant_folding);
|
|
||||||
NIR_PASS(more_algebraic, nir, nir_opt_algebraic);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Do late algebraic optimization to turn add(a, neg(b)) back into
|
for (unsigned i = 0; i < shader_count; i++)
|
||||||
* subs, then the mandatory cleanup after algebraic. Note that it may
|
scratch_size = std::max(scratch_size, shaders[i]->scratch_size);
|
||||||
* produce fnegs, and if so then we need to keep running to squash
|
|
||||||
* fneg(fneg(a)).
|
|
||||||
*/
|
|
||||||
bool more_late_algebraic = true;
|
|
||||||
while (more_late_algebraic) {
|
|
||||||
more_late_algebraic = false;
|
|
||||||
NIR_PASS(more_late_algebraic, nir, nir_opt_algebraic_late);
|
|
||||||
NIR_PASS_V(nir, nir_opt_constant_folding);
|
|
||||||
NIR_PASS_V(nir, nir_copy_prop);
|
|
||||||
NIR_PASS_V(nir, nir_opt_dce);
|
|
||||||
NIR_PASS_V(nir, nir_opt_cse);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* cleanup passes */
|
|
||||||
nir_lower_load_const_to_scalar(nir);
|
|
||||||
nir_opt_shrink_load(nir);
|
|
||||||
nir_move_options move_opts = (nir_move_options)(
|
|
||||||
nir_move_const_undef | nir_move_load_ubo | nir_move_load_input | nir_move_comparisons);
|
|
||||||
nir_opt_sink(nir, move_opts);
|
|
||||||
nir_opt_move(nir, move_opts);
|
|
||||||
nir_convert_to_lcssa(nir, true, false);
|
|
||||||
nir_lower_phis_to_scalar(nir);
|
|
||||||
|
|
||||||
nir_function_impl *func = nir_shader_get_entrypoint(nir);
|
|
||||||
nir_index_ssa_defs(func);
|
|
||||||
nir_metadata_require(func, nir_metadata_block_index);
|
|
||||||
|
|
||||||
if (args->options->dump_preoptir) {
|
|
||||||
fprintf(stderr, "NIR shader before instruction selection:\n");
|
|
||||||
nir_print_shader(nir, stderr);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned scratch_size = 0;
|
|
||||||
for (unsigned i = 0; i < shader_count; i++)
|
|
||||||
scratch_size = std::max(scratch_size, shaders[i]->scratch_size);
|
|
||||||
ctx.program->config->scratch_bytes_per_wave = align(scratch_size * ctx.program->wave_size, 1024);
|
ctx.program->config->scratch_bytes_per_wave = align(scratch_size * ctx.program->wave_size, 1024);
|
||||||
|
|
||||||
ctx.block = ctx.program->create_and_insert_block();
|
ctx.block = ctx.program->create_and_insert_block();
|
||||||
|
|
|
@ -65,7 +65,10 @@ void aco_compile_shader(unsigned shader_count,
|
||||||
std::unique_ptr<aco::Program> program{new aco::Program};
|
std::unique_ptr<aco::Program> program{new aco::Program};
|
||||||
|
|
||||||
/* Instruction Selection */
|
/* Instruction Selection */
|
||||||
aco::select_program(program.get(), shader_count, shaders, &config, args);
|
if (args->is_gs_copy_shader)
|
||||||
|
aco::select_gs_copy_shader(program.get(), shaders[0], &config, args);
|
||||||
|
else
|
||||||
|
aco::select_program(program.get(), shader_count, shaders, &config, args);
|
||||||
if (args->options->dump_preoptir) {
|
if (args->options->dump_preoptir) {
|
||||||
std::cerr << "After Instruction Selection:\n";
|
std::cerr << "After Instruction Selection:\n";
|
||||||
aco_print_program(program.get(), stderr);
|
aco_print_program(program.get(), stderr);
|
||||||
|
@ -162,7 +165,7 @@ void aco_compile_shader(unsigned shader_count,
|
||||||
|
|
||||||
legacy_binary->base.type = RADV_BINARY_TYPE_LEGACY;
|
legacy_binary->base.type = RADV_BINARY_TYPE_LEGACY;
|
||||||
legacy_binary->base.stage = shaders[shader_count-1]->info.stage;
|
legacy_binary->base.stage = shaders[shader_count-1]->info.stage;
|
||||||
legacy_binary->base.is_gs_copy_shader = false;
|
legacy_binary->base.is_gs_copy_shader = args->is_gs_copy_shader;
|
||||||
legacy_binary->base.total_size = size;
|
legacy_binary->base.total_size = size;
|
||||||
|
|
||||||
memcpy(legacy_binary->data, code.data(), code.size() * sizeof(uint32_t));
|
memcpy(legacy_binary->data, code.data(), code.size() * sizeof(uint32_t));
|
||||||
|
|
|
@ -1106,23 +1106,25 @@ static constexpr Stage sw_tcs = 1 << 2;
|
||||||
static constexpr Stage sw_tes = 1 << 3;
|
static constexpr Stage sw_tes = 1 << 3;
|
||||||
static constexpr Stage sw_fs = 1 << 4;
|
static constexpr Stage sw_fs = 1 << 4;
|
||||||
static constexpr Stage sw_cs = 1 << 5;
|
static constexpr Stage sw_cs = 1 << 5;
|
||||||
static constexpr Stage sw_mask = 0x3f;
|
static constexpr Stage sw_gs_copy = 1 << 6;
|
||||||
|
static constexpr Stage sw_mask = 0x7f;
|
||||||
|
|
||||||
/* hardware stages (can't be OR'd, just a mask for convenience when testing multiple) */
|
/* hardware stages (can't be OR'd, just a mask for convenience when testing multiple) */
|
||||||
static constexpr Stage hw_vs = 1 << 6;
|
static constexpr Stage hw_vs = 1 << 7;
|
||||||
static constexpr Stage hw_es = 1 << 7; /* not on GFX9. combined into GS on GFX9 (and GFX10/legacy). */
|
static constexpr Stage hw_es = 1 << 8; /* not on GFX9. combined into GS on GFX9 (and GFX10/legacy). */
|
||||||
static constexpr Stage hw_gs = 1 << 8;
|
static constexpr Stage hw_gs = 1 << 9;
|
||||||
static constexpr Stage hw_ls = 1 << 9; /* not on GFX9. combined into HS on GFX9 (and GFX10/legacy). */
|
static constexpr Stage hw_ls = 1 << 10; /* not on GFX9. combined into HS on GFX9 (and GFX10/legacy). */
|
||||||
static constexpr Stage hw_hs = 1 << 10;
|
static constexpr Stage hw_hs = 1 << 11;
|
||||||
static constexpr Stage hw_fs = 1 << 11;
|
static constexpr Stage hw_fs = 1 << 12;
|
||||||
static constexpr Stage hw_cs = 1 << 12;
|
static constexpr Stage hw_cs = 1 << 13;
|
||||||
static constexpr Stage hw_mask = 0x7f << 6;
|
static constexpr Stage hw_mask = 0x7f << 7;
|
||||||
|
|
||||||
/* possible settings of Program::stage */
|
/* possible settings of Program::stage */
|
||||||
static constexpr Stage vertex_vs = sw_vs | hw_vs;
|
static constexpr Stage vertex_vs = sw_vs | hw_vs;
|
||||||
static constexpr Stage fragment_fs = sw_fs | hw_fs;
|
static constexpr Stage fragment_fs = sw_fs | hw_fs;
|
||||||
static constexpr Stage compute_cs = sw_cs | hw_cs;
|
static constexpr Stage compute_cs = sw_cs | hw_cs;
|
||||||
static constexpr Stage tess_eval_vs = sw_tes | hw_vs;
|
static constexpr Stage tess_eval_vs = sw_tes | hw_vs;
|
||||||
|
static constexpr Stage gs_copy_vs = sw_gs_copy | hw_vs;
|
||||||
/* GFX10/NGG */
|
/* GFX10/NGG */
|
||||||
static constexpr Stage ngg_vertex_gs = sw_vs | hw_gs;
|
static constexpr Stage ngg_vertex_gs = sw_vs | hw_gs;
|
||||||
static constexpr Stage ngg_vertex_geometry_gs = sw_vs | sw_gs | hw_gs;
|
static constexpr Stage ngg_vertex_geometry_gs = sw_vs | sw_gs | hw_gs;
|
||||||
|
@ -1219,6 +1221,9 @@ void select_program(Program *program,
|
||||||
struct nir_shader *const *shaders,
|
struct nir_shader *const *shaders,
|
||||||
ac_shader_config* config,
|
ac_shader_config* config,
|
||||||
struct radv_shader_args *args);
|
struct radv_shader_args *args);
|
||||||
|
void select_gs_copy_shader(Program *program, struct nir_shader *gs_shader,
|
||||||
|
ac_shader_config* config,
|
||||||
|
struct radv_shader_args *args);
|
||||||
|
|
||||||
void lower_wqm(Program* program, live& live_vars,
|
void lower_wqm(Program* program, live& live_vars,
|
||||||
const struct radv_nir_compiler_options *options);
|
const struct radv_nir_compiler_options *options);
|
||||||
|
|
Loading…
Reference in New Issue