radeonsi/nir: Use NIR barycentric intrinsics

This is simpler than radv, since the driver_location is already assigned
for us.

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
This commit is contained in:
Connor Abbott 2019-05-14 13:29:52 +02:00
parent d1c65939e2
commit b3a226691d
4 changed files with 143 additions and 69 deletions

View File

@ -3341,6 +3341,12 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
case nir_intrinsic_load_helper_invocation:
result = ac_build_load_helper_invocation(&ctx->ac);
break;
case nir_intrinsic_load_color0:
result = ctx->abi->color0;
break;
case nir_intrinsic_load_color1:
result = ctx->abi->color1;
break;
case nir_intrinsic_load_instance_id:
result = ctx->abi->instance_id;
break;

View File

@ -63,6 +63,8 @@ struct ac_shader_abi {
LLVMValueRef ancillary;
LLVMValueRef sample_coverage;
LLVMValueRef prim_mask;
LLVMValueRef color0;
LLVMValueRef color1;
/* CS */
LLVMValueRef local_invocation_ids;
LLVMValueRef num_work_groups;

View File

@ -510,6 +510,7 @@ static const struct nir_shader_compiler_options nir_options = {
.lower_rotate = true,
.optimize_sample_mask_in = true,
.max_unroll_iterations = 32,
.use_interpolated_input_intrinsics = true,
};
static const void *

View File

@ -31,6 +31,7 @@
#include "compiler/nir/nir.h"
#include "compiler/nir_types.h"
#include "compiler/nir/nir_builder.h"
static nir_variable* tex_get_texture_var(nir_tex_instr *instr)
{
@ -461,51 +462,43 @@ void si_nir_scan_shader(const struct nir_shader *nir,
if (semantic_name == TGSI_SEMANTIC_PRIMID)
info->uses_primid = true;
if (variable->data.sample)
info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_SAMPLE;
else if (variable->data.centroid)
info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_CENTROID;
else
info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_CENTER;
enum glsl_base_type base_type =
glsl_get_base_type(glsl_without_array(variable->type));
enum glsl_base_type base_type =
glsl_get_base_type(glsl_without_array(variable->type));
switch (variable->data.interpolation) {
case INTERP_MODE_NONE:
if (glsl_base_type_is_integer(base_type)) {
info->input_interpolate[i] = TGSI_INTERPOLATE_CONSTANT;
break;
}
switch (variable->data.interpolation) {
case INTERP_MODE_NONE:
if (glsl_base_type_is_integer(base_type)) {
info->input_interpolate[i] = TGSI_INTERPOLATE_CONSTANT;
break;
}
if (semantic_name == TGSI_SEMANTIC_COLOR) {
info->input_interpolate[i] = TGSI_INTERPOLATE_COLOR;
break;
}
/* fall-through */
if (semantic_name == TGSI_SEMANTIC_COLOR) {
info->input_interpolate[i] = TGSI_INTERPOLATE_COLOR;
break;
}
/* fall-through */
case INTERP_MODE_SMOOTH:
assert(!glsl_base_type_is_integer(base_type));
case INTERP_MODE_SMOOTH:
assert(!glsl_base_type_is_integer(base_type));
info->input_interpolate[i] = TGSI_INTERPOLATE_PERSPECTIVE;
break;
info->input_interpolate[i] = TGSI_INTERPOLATE_PERSPECTIVE;
break;
case INTERP_MODE_NOPERSPECTIVE:
assert(!glsl_base_type_is_integer(base_type));
case INTERP_MODE_NOPERSPECTIVE:
assert(!glsl_base_type_is_integer(base_type));
info->input_interpolate[i] = TGSI_INTERPOLATE_LINEAR;
break;
info->input_interpolate[i] = TGSI_INTERPOLATE_LINEAR;
break;
case INTERP_MODE_FLAT:
info->input_interpolate[i] = TGSI_INTERPOLATE_CONSTANT;
break;
}
case INTERP_MODE_FLAT:
info->input_interpolate[i] = TGSI_INTERPOLATE_CONSTANT;
break;
}
}
}
info->num_inputs = num_inputs;
i = 0;
uint64_t processed_outputs = 0;
unsigned num_outputs = 0;
@ -856,6 +849,53 @@ si_nir_opts(struct nir_shader *nir)
} while (progress);
}
static int
type_size_vec4(const struct glsl_type *type, bool bindless)
{
return glsl_count_attribute_slots(type, false);
}
static void
si_nir_lower_color(nir_shader *nir)
{
nir_function_impl *entrypoint = nir_shader_get_entrypoint(nir);
nir_builder b;
nir_builder_init(&b, entrypoint);
nir_foreach_block(block, entrypoint) {
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intrin =
nir_instr_as_intrinsic(instr);
if (intrin->intrinsic != nir_intrinsic_load_deref)
continue;
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
if (deref->mode != nir_var_shader_in)
continue;
b.cursor = nir_before_instr(instr);
nir_variable *var = nir_deref_instr_get_variable(deref);
nir_ssa_def *def;
if (var->data.location == VARYING_SLOT_COL0) {
def = nir_load_color0(&b);
} else if (var->data.location == VARYING_SLOT_COL1) {
def = nir_load_color1(&b);
} else {
continue;
}
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(def));
nir_instr_remove(instr);
}
}
}
/**
* Perform "lowering" operations on the NIR that are run once when the shader
* selector is created.
@ -867,8 +907,29 @@ si_lower_nir(struct si_shader_selector* sel)
* interprets them as slots, while the ac/nir backend interprets them
* as individual components.
*/
nir_foreach_variable(variable, &sel->nir->inputs)
variable->data.driver_location *= 4;
if (sel->nir->info.stage != MESA_SHADER_FRAGMENT) {
nir_foreach_variable(variable, &sel->nir->inputs)
variable->data.driver_location *= 4;
} else {
NIR_PASS_V(sel->nir, nir_lower_io_to_temporaries,
nir_shader_get_entrypoint(sel->nir), false, true);
/* Since we're doing nir_lower_io_to_temporaries late, we need
* to lower all the copy_deref's introduced by
* lower_io_to_temporaries before calling nir_lower_io.
*/
NIR_PASS_V(sel->nir, nir_split_var_copies);
NIR_PASS_V(sel->nir, nir_lower_var_copies);
NIR_PASS_V(sel->nir, nir_lower_global_vars_to_local);
si_nir_lower_color(sel->nir);
NIR_PASS_V(sel->nir, nir_lower_io, nir_var_shader_in, type_size_vec4, 0);
/* This pass needs actual constants */
NIR_PASS_V(sel->nir, nir_opt_constant_folding);
NIR_PASS_V(sel->nir, nir_io_add_const_offset_to_base,
nir_var_shader_in);
}
nir_foreach_variable(variable, &sel->nir->outputs) {
variable->data.driver_location *= 4;
@ -924,24 +985,6 @@ static void declare_nir_input_vs(struct si_shader_context *ctx,
si_llvm_load_input_vs(ctx, input_index, out);
}
static void declare_nir_input_fs(struct si_shader_context *ctx,
struct nir_variable *variable,
unsigned input_index,
LLVMValueRef out[4])
{
unsigned slot = variable->data.location;
if (slot == VARYING_SLOT_POS) {
out[0] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_X_FLOAT);
out[1] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Y_FLOAT);
out[2] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Z_FLOAT);
out[3] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1,
LLVMGetParam(ctx->main_fn, SI_PARAM_POS_W_FLOAT));
return;
}
si_llvm_load_input_fs(ctx, input_index, out);
}
LLVMValueRef
si_nir_lookup_interp_param(struct ac_shader_abi *abi,
enum glsl_interp_mode interp, unsigned location)
@ -1067,20 +1110,16 @@ bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir)
{
struct tgsi_shader_info *info = &ctx->shader->selector->info;
if (nir->info.stage == MESA_SHADER_VERTEX ||
nir->info.stage == MESA_SHADER_FRAGMENT) {
if (nir->info.stage == MESA_SHADER_VERTEX) {
uint64_t processed_inputs = 0;
nir_foreach_variable(variable, &nir->inputs) {
unsigned attrib_count = glsl_count_attribute_slots(variable->type,
nir->info.stage == MESA_SHADER_VERTEX);
true);
unsigned input_idx = variable->data.driver_location;
LLVMValueRef data[4];
unsigned loc = variable->data.location;
if (loc >= VARYING_SLOT_VAR0 && nir->info.stage == MESA_SHADER_FRAGMENT)
ctx->abi.fs_input_attr_indices[loc - VARYING_SLOT_VAR0] = input_idx / 4;
for (unsigned i = 0; i < attrib_count; i++) {
/* Packed components share the same location so skip
* them if we have already processed the location.
@ -1090,24 +1129,50 @@ bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir)
continue;
}
if (nir->info.stage == MESA_SHADER_VERTEX) {
declare_nir_input_vs(ctx, variable, input_idx / 4, data);
bitcast_inputs(ctx, data, input_idx);
if (glsl_type_is_dual_slot(variable->type)) {
input_idx += 4;
declare_nir_input_vs(ctx, variable, input_idx / 4, data);
bitcast_inputs(ctx, data, input_idx);
if (glsl_type_is_dual_slot(variable->type)) {
input_idx += 4;
declare_nir_input_vs(ctx, variable, input_idx / 4, data);
bitcast_inputs(ctx, data, input_idx);
}
} else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
declare_nir_input_fs(ctx, variable, input_idx / 4, data);
bitcast_inputs(ctx, data, input_idx);
}
processed_inputs |= ((uint64_t)1 << (loc + i));
input_idx += 4;
}
}
}
} else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
unsigned colors_read =
ctx->shader->selector->info.colors_read;
LLVMValueRef main_fn = ctx->main_fn;
LLVMValueRef undef = LLVMGetUndef(ctx->f32);
unsigned offset = SI_PARAM_POS_FIXED_PT + 1;
if (colors_read & 0x0f) {
unsigned mask = colors_read & 0x0f;
LLVMValueRef values[4];
values[0] = mask & 0x1 ? LLVMGetParam(main_fn, offset++) : undef;
values[1] = mask & 0x2 ? LLVMGetParam(main_fn, offset++) : undef;
values[2] = mask & 0x4 ? LLVMGetParam(main_fn, offset++) : undef;
values[3] = mask & 0x8 ? LLVMGetParam(main_fn, offset++) : undef;
ctx->abi.color0 =
ac_to_integer(&ctx->ac,
ac_build_gather_values(&ctx->ac, values, 4));
}
if (colors_read & 0xf0) {
unsigned mask = (colors_read & 0xf0) >> 4;
LLVMValueRef values[4];
values[0] = mask & 0x1 ? LLVMGetParam(main_fn, offset++) : undef;
values[1] = mask & 0x2 ? LLVMGetParam(main_fn, offset++) : undef;
values[2] = mask & 0x4 ? LLVMGetParam(main_fn, offset++) : undef;
values[3] = mask & 0x8 ? LLVMGetParam(main_fn, offset++) : undef;
ctx->abi.color1 =
ac_to_integer(&ctx->ac,
ac_build_gather_values(&ctx->ac, values, 4));
}
}
ctx->abi.inputs = &ctx->inputs[0];
ctx->abi.load_sampler_desc = si_nir_load_sampler_desc;