radeonsi/nir: Use NIR barycentric intrinsics

This is simpler than radv, since the driver_location is already assigned
for us.

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
This commit is contained in:
Connor Abbott 2019-05-14 13:29:52 +02:00
parent d1c65939e2
commit b3a226691d
4 changed files with 143 additions and 69 deletions

View File

@ -3341,6 +3341,12 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
case nir_intrinsic_load_helper_invocation: case nir_intrinsic_load_helper_invocation:
result = ac_build_load_helper_invocation(&ctx->ac); result = ac_build_load_helper_invocation(&ctx->ac);
break; break;
case nir_intrinsic_load_color0:
result = ctx->abi->color0;
break;
case nir_intrinsic_load_color1:
result = ctx->abi->color1;
break;
case nir_intrinsic_load_instance_id: case nir_intrinsic_load_instance_id:
result = ctx->abi->instance_id; result = ctx->abi->instance_id;
break; break;

View File

@ -63,6 +63,8 @@ struct ac_shader_abi {
LLVMValueRef ancillary; LLVMValueRef ancillary;
LLVMValueRef sample_coverage; LLVMValueRef sample_coverage;
LLVMValueRef prim_mask; LLVMValueRef prim_mask;
LLVMValueRef color0;
LLVMValueRef color1;
/* CS */ /* CS */
LLVMValueRef local_invocation_ids; LLVMValueRef local_invocation_ids;
LLVMValueRef num_work_groups; LLVMValueRef num_work_groups;

View File

@ -510,6 +510,7 @@ static const struct nir_shader_compiler_options nir_options = {
.lower_rotate = true, .lower_rotate = true,
.optimize_sample_mask_in = true, .optimize_sample_mask_in = true,
.max_unroll_iterations = 32, .max_unroll_iterations = 32,
.use_interpolated_input_intrinsics = true,
}; };
static const void * static const void *

View File

@ -31,6 +31,7 @@
#include "compiler/nir/nir.h" #include "compiler/nir/nir.h"
#include "compiler/nir_types.h" #include "compiler/nir_types.h"
#include "compiler/nir/nir_builder.h"
static nir_variable* tex_get_texture_var(nir_tex_instr *instr) static nir_variable* tex_get_texture_var(nir_tex_instr *instr)
{ {
@ -461,51 +462,43 @@ void si_nir_scan_shader(const struct nir_shader *nir,
if (semantic_name == TGSI_SEMANTIC_PRIMID) if (semantic_name == TGSI_SEMANTIC_PRIMID)
info->uses_primid = true; info->uses_primid = true;
if (variable->data.sample) enum glsl_base_type base_type =
info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_SAMPLE; glsl_get_base_type(glsl_without_array(variable->type));
else if (variable->data.centroid)
info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_CENTROID;
else
info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_CENTER;
enum glsl_base_type base_type = switch (variable->data.interpolation) {
glsl_get_base_type(glsl_without_array(variable->type)); case INTERP_MODE_NONE:
if (glsl_base_type_is_integer(base_type)) {
info->input_interpolate[i] = TGSI_INTERPOLATE_CONSTANT;
break;
}
switch (variable->data.interpolation) { if (semantic_name == TGSI_SEMANTIC_COLOR) {
case INTERP_MODE_NONE: info->input_interpolate[i] = TGSI_INTERPOLATE_COLOR;
if (glsl_base_type_is_integer(base_type)) { break;
info->input_interpolate[i] = TGSI_INTERPOLATE_CONSTANT; }
break; /* fall-through */
}
if (semantic_name == TGSI_SEMANTIC_COLOR) { case INTERP_MODE_SMOOTH:
info->input_interpolate[i] = TGSI_INTERPOLATE_COLOR; assert(!glsl_base_type_is_integer(base_type));
break;
}
/* fall-through */
case INTERP_MODE_SMOOTH: info->input_interpolate[i] = TGSI_INTERPOLATE_PERSPECTIVE;
assert(!glsl_base_type_is_integer(base_type)); break;
info->input_interpolate[i] = TGSI_INTERPOLATE_PERSPECTIVE; case INTERP_MODE_NOPERSPECTIVE:
break; assert(!glsl_base_type_is_integer(base_type));
case INTERP_MODE_NOPERSPECTIVE: info->input_interpolate[i] = TGSI_INTERPOLATE_LINEAR;
assert(!glsl_base_type_is_integer(base_type)); break;
info->input_interpolate[i] = TGSI_INTERPOLATE_LINEAR; case INTERP_MODE_FLAT:
break; info->input_interpolate[i] = TGSI_INTERPOLATE_CONSTANT;
break;
case INTERP_MODE_FLAT: }
info->input_interpolate[i] = TGSI_INTERPOLATE_CONSTANT;
break;
}
} }
} }
info->num_inputs = num_inputs; info->num_inputs = num_inputs;
i = 0; i = 0;
uint64_t processed_outputs = 0; uint64_t processed_outputs = 0;
unsigned num_outputs = 0; unsigned num_outputs = 0;
@ -856,6 +849,53 @@ si_nir_opts(struct nir_shader *nir)
} while (progress); } while (progress);
} }
static int
type_size_vec4(const struct glsl_type *type, bool bindless)
{
return glsl_count_attribute_slots(type, false);
}
static void
si_nir_lower_color(nir_shader *nir)
{
nir_function_impl *entrypoint = nir_shader_get_entrypoint(nir);
nir_builder b;
nir_builder_init(&b, entrypoint);
nir_foreach_block(block, entrypoint) {
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intrin =
nir_instr_as_intrinsic(instr);
if (intrin->intrinsic != nir_intrinsic_load_deref)
continue;
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
if (deref->mode != nir_var_shader_in)
continue;
b.cursor = nir_before_instr(instr);
nir_variable *var = nir_deref_instr_get_variable(deref);
nir_ssa_def *def;
if (var->data.location == VARYING_SLOT_COL0) {
def = nir_load_color0(&b);
} else if (var->data.location == VARYING_SLOT_COL1) {
def = nir_load_color1(&b);
} else {
continue;
}
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(def));
nir_instr_remove(instr);
}
}
}
/** /**
* Perform "lowering" operations on the NIR that are run once when the shader * Perform "lowering" operations on the NIR that are run once when the shader
* selector is created. * selector is created.
@ -867,8 +907,29 @@ si_lower_nir(struct si_shader_selector* sel)
* interprets them as slots, while the ac/nir backend interprets them * interprets them as slots, while the ac/nir backend interprets them
* as individual components. * as individual components.
*/ */
nir_foreach_variable(variable, &sel->nir->inputs) if (sel->nir->info.stage != MESA_SHADER_FRAGMENT) {
variable->data.driver_location *= 4; nir_foreach_variable(variable, &sel->nir->inputs)
variable->data.driver_location *= 4;
} else {
NIR_PASS_V(sel->nir, nir_lower_io_to_temporaries,
nir_shader_get_entrypoint(sel->nir), false, true);
/* Since we're doing nir_lower_io_to_temporaries late, we need
* to lower all the copy_deref's introduced by
* lower_io_to_temporaries before calling nir_lower_io.
*/
NIR_PASS_V(sel->nir, nir_split_var_copies);
NIR_PASS_V(sel->nir, nir_lower_var_copies);
NIR_PASS_V(sel->nir, nir_lower_global_vars_to_local);
si_nir_lower_color(sel->nir);
NIR_PASS_V(sel->nir, nir_lower_io, nir_var_shader_in, type_size_vec4, 0);
/* This pass needs actual constants */
NIR_PASS_V(sel->nir, nir_opt_constant_folding);
NIR_PASS_V(sel->nir, nir_io_add_const_offset_to_base,
nir_var_shader_in);
}
nir_foreach_variable(variable, &sel->nir->outputs) { nir_foreach_variable(variable, &sel->nir->outputs) {
variable->data.driver_location *= 4; variable->data.driver_location *= 4;
@ -924,24 +985,6 @@ static void declare_nir_input_vs(struct si_shader_context *ctx,
si_llvm_load_input_vs(ctx, input_index, out); si_llvm_load_input_vs(ctx, input_index, out);
} }
static void declare_nir_input_fs(struct si_shader_context *ctx,
struct nir_variable *variable,
unsigned input_index,
LLVMValueRef out[4])
{
unsigned slot = variable->data.location;
if (slot == VARYING_SLOT_POS) {
out[0] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_X_FLOAT);
out[1] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Y_FLOAT);
out[2] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Z_FLOAT);
out[3] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1,
LLVMGetParam(ctx->main_fn, SI_PARAM_POS_W_FLOAT));
return;
}
si_llvm_load_input_fs(ctx, input_index, out);
}
LLVMValueRef LLVMValueRef
si_nir_lookup_interp_param(struct ac_shader_abi *abi, si_nir_lookup_interp_param(struct ac_shader_abi *abi,
enum glsl_interp_mode interp, unsigned location) enum glsl_interp_mode interp, unsigned location)
@ -1067,20 +1110,16 @@ bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir)
{ {
struct tgsi_shader_info *info = &ctx->shader->selector->info; struct tgsi_shader_info *info = &ctx->shader->selector->info;
if (nir->info.stage == MESA_SHADER_VERTEX || if (nir->info.stage == MESA_SHADER_VERTEX) {
nir->info.stage == MESA_SHADER_FRAGMENT) {
uint64_t processed_inputs = 0; uint64_t processed_inputs = 0;
nir_foreach_variable(variable, &nir->inputs) { nir_foreach_variable(variable, &nir->inputs) {
unsigned attrib_count = glsl_count_attribute_slots(variable->type, unsigned attrib_count = glsl_count_attribute_slots(variable->type,
nir->info.stage == MESA_SHADER_VERTEX); true);
unsigned input_idx = variable->data.driver_location; unsigned input_idx = variable->data.driver_location;
LLVMValueRef data[4]; LLVMValueRef data[4];
unsigned loc = variable->data.location; unsigned loc = variable->data.location;
if (loc >= VARYING_SLOT_VAR0 && nir->info.stage == MESA_SHADER_FRAGMENT)
ctx->abi.fs_input_attr_indices[loc - VARYING_SLOT_VAR0] = input_idx / 4;
for (unsigned i = 0; i < attrib_count; i++) { for (unsigned i = 0; i < attrib_count; i++) {
/* Packed components share the same location so skip /* Packed components share the same location so skip
* them if we have already processed the location. * them if we have already processed the location.
@ -1090,24 +1129,50 @@ bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir)
continue; continue;
} }
if (nir->info.stage == MESA_SHADER_VERTEX) { declare_nir_input_vs(ctx, variable, input_idx / 4, data);
bitcast_inputs(ctx, data, input_idx);
if (glsl_type_is_dual_slot(variable->type)) {
input_idx += 4;
declare_nir_input_vs(ctx, variable, input_idx / 4, data); declare_nir_input_vs(ctx, variable, input_idx / 4, data);
bitcast_inputs(ctx, data, input_idx); bitcast_inputs(ctx, data, input_idx);
if (glsl_type_is_dual_slot(variable->type)) {
input_idx += 4;
declare_nir_input_vs(ctx, variable, input_idx / 4, data);
bitcast_inputs(ctx, data, input_idx);
}
} else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
declare_nir_input_fs(ctx, variable, input_idx / 4, data);
bitcast_inputs(ctx, data, input_idx);
} }
processed_inputs |= ((uint64_t)1 << (loc + i)); processed_inputs |= ((uint64_t)1 << (loc + i));
input_idx += 4; input_idx += 4;
} }
} }
} } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
unsigned colors_read =
ctx->shader->selector->info.colors_read;
LLVMValueRef main_fn = ctx->main_fn;
LLVMValueRef undef = LLVMGetUndef(ctx->f32);
unsigned offset = SI_PARAM_POS_FIXED_PT + 1;
if (colors_read & 0x0f) {
unsigned mask = colors_read & 0x0f;
LLVMValueRef values[4];
values[0] = mask & 0x1 ? LLVMGetParam(main_fn, offset++) : undef;
values[1] = mask & 0x2 ? LLVMGetParam(main_fn, offset++) : undef;
values[2] = mask & 0x4 ? LLVMGetParam(main_fn, offset++) : undef;
values[3] = mask & 0x8 ? LLVMGetParam(main_fn, offset++) : undef;
ctx->abi.color0 =
ac_to_integer(&ctx->ac,
ac_build_gather_values(&ctx->ac, values, 4));
}
if (colors_read & 0xf0) {
unsigned mask = (colors_read & 0xf0) >> 4;
LLVMValueRef values[4];
values[0] = mask & 0x1 ? LLVMGetParam(main_fn, offset++) : undef;
values[1] = mask & 0x2 ? LLVMGetParam(main_fn, offset++) : undef;
values[2] = mask & 0x4 ? LLVMGetParam(main_fn, offset++) : undef;
values[3] = mask & 0x8 ? LLVMGetParam(main_fn, offset++) : undef;
ctx->abi.color1 =
ac_to_integer(&ctx->ac,
ac_build_gather_values(&ctx->ac, values, 4));
}
}
ctx->abi.inputs = &ctx->inputs[0]; ctx->abi.inputs = &ctx->inputs[0];
ctx->abi.load_sampler_desc = si_nir_load_sampler_desc; ctx->abi.load_sampler_desc = si_nir_load_sampler_desc;