radeonsi: add PS prolog

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
Marek Olšák 2016-02-15 23:57:54 +01:00
parent e79bb746ab
commit 4636d9be4a
5 changed files with 345 additions and 2 deletions

View File

@ -542,6 +542,7 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
sscreen->vs_prologs,
sscreen->vs_epilogs,
sscreen->tcs_epilogs,
sscreen->ps_prologs,
sscreen->ps_epilogs
};
unsigned i;

View File

@ -92,6 +92,7 @@ struct si_screen {
struct si_shader_part *vs_prologs;
struct si_shader_part *vs_epilogs;
struct si_shader_part *tcs_epilogs;
struct si_shader_part *ps_prologs;
struct si_shader_part *ps_epilogs;
};

View File

@ -879,7 +879,8 @@ static int lookup_interp_param_index(unsigned interpolate, unsigned location)
static unsigned select_interp_param(struct si_shader_context *ctx,
unsigned param)
{
if (!ctx->shader->key.ps.prolog.force_persample_interp)
if (!ctx->shader->key.ps.prolog.force_persample_interp ||
!ctx->is_monolithic)
return param;
/* If the shader doesn't use center/centroid, just return the parameter.
@ -1023,6 +1024,7 @@ static void declare_input_fs(
unsigned input_index,
const struct tgsi_full_declaration *decl)
{
struct lp_build_context *base = &radeon_bld->soa.bld_base.base;
struct si_shader_context *ctx =
si_shader_context(&radeon_bld->soa.bld_base);
struct si_shader *shader = ctx->shader;
@ -1030,6 +1032,26 @@ static void declare_input_fs(
LLVMValueRef interp_param = NULL;
int interp_param_idx;
/* Get colors from input VGPRs (set by the prolog). */
if (!ctx->is_monolithic &&
decl->Semantic.Name == TGSI_SEMANTIC_COLOR) {
unsigned i = decl->Semantic.Index;
unsigned colors_read = shader->selector->info.colors_read;
unsigned mask = colors_read >> (i * 4);
unsigned offset = SI_PARAM_POS_FIXED_PT + 1 +
(i ? util_bitcount(colors_read & 0xf) : 0);
radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 0)] =
mask & 0x1 ? LLVMGetParam(main_fn, offset++) : base->undef;
radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 1)] =
mask & 0x2 ? LLVMGetParam(main_fn, offset++) : base->undef;
radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 2)] =
mask & 0x4 ? LLVMGetParam(main_fn, offset++) : base->undef;
radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 3)] =
mask & 0x8 ? LLVMGetParam(main_fn, offset++) : base->undef;
return;
}
interp_param_idx = lookup_interp_param_index(decl->Interp.Interpolate,
decl->Interp.Location);
if (interp_param_idx == -1)
@ -3970,6 +3992,16 @@ static void create_function(struct si_shader_context *ctx)
num_params = SI_PARAM_POS_FIXED_PT+1;
if (!ctx->is_monolithic) {
/* Color inputs from the prolog. */
if (shader->selector->info.colors_read) {
unsigned num_color_elements =
util_bitcount(shader->selector->info.colors_read);
assert(num_params + num_color_elements <= ARRAY_SIZE(params));
for (i = 0; i < num_color_elements; i++)
params[num_params++] = ctx->f32;
}
/* Outputs for the epilog. */
num_return_sgprs = SI_SGPR_ALPHA_REF + 1;
num_returns =
@ -4001,6 +4033,20 @@ static void create_function(struct si_shader_context *ctx)
si_create_function(ctx, returns, num_returns, params,
num_params, last_array_pointer, last_sgpr);
/* Reserve register locations for VGPR inputs the PS prolog may need. */
if (ctx->type == TGSI_PROCESSOR_FRAGMENT &&
!ctx->is_monolithic) {
radeon_llvm_add_attribute(ctx->radeon_bld.main_fn,
"InitialPSInputAddr",
S_0286D0_PERSP_SAMPLE_ENA(1) |
S_0286D0_PERSP_CENTER_ENA(1) |
S_0286D0_PERSP_CENTROID_ENA(1) |
S_0286D0_LINEAR_SAMPLE_ENA(1) |
S_0286D0_LINEAR_CENTER_ENA(1) |
S_0286D0_LINEAR_CENTROID_ENA(1) |
S_0286D0_FRONT_FACE_ENA(1));
}
shader->num_input_sgprs = 0;
shader->num_input_vgprs = 0;
@ -5304,6 +5350,157 @@ static bool si_shader_select_tcs_parts(struct si_screen *sscreen,
return shader->epilog != NULL;
}
/**
* Compile the pixel shader prolog. This handles:
* - two-side color selection and interpolation
* - overriding interpolation parameters for the API PS
* - polygon stippling
*
* All preloaded SGPRs and VGPRs are passed through unmodified unless they are
* overriden by other states. (e.g. per-sample interpolation)
* Interpolated colors are stored after the preloaded VGPRs.
*/
static bool si_compile_ps_prolog(struct si_screen *sscreen,
LLVMTargetMachineRef tm,
struct pipe_debug_callback *debug,
struct si_shader_part *out)
{
union si_shader_part_key *key = &out->key;
struct si_shader shader = {};
struct si_shader_context ctx;
struct gallivm_state *gallivm = &ctx.radeon_bld.gallivm;
LLVMTypeRef *params;
LLVMValueRef ret, func;
int last_sgpr, num_params, num_returns, i, num_color_channels;
bool status = true;
si_init_shader_ctx(&ctx, sscreen, &shader, tm, NULL);
ctx.type = TGSI_PROCESSOR_FRAGMENT;
shader.key.ps.prolog = key->ps_prolog.states;
/* Number of inputs + 8 color elements. */
params = alloca((key->ps_prolog.num_input_sgprs +
key->ps_prolog.num_input_vgprs + 8) *
sizeof(LLVMTypeRef));
/* Declare inputs. */
num_params = 0;
for (i = 0; i < key->ps_prolog.num_input_sgprs; i++)
params[num_params++] = ctx.i32;
last_sgpr = num_params - 1;
for (i = 0; i < key->ps_prolog.num_input_vgprs; i++)
params[num_params++] = ctx.f32;
/* Declare outputs (same as inputs + add colors if needed) */
num_returns = num_params;
num_color_channels = util_bitcount(key->ps_prolog.colors_read);
for (i = 0; i < num_color_channels; i++)
params[num_returns++] = ctx.f32;
/* Create the function. */
si_create_function(&ctx, params, num_returns, params,
num_params, -1, last_sgpr);
func = ctx.radeon_bld.main_fn;
/* Copy inputs to outputs. This should be no-op, as the registers match,
* but it will prevent the compiler from overwriting them unintentionally.
*/
ret = ctx.return_value;
for (i = 0; i < num_params; i++) {
LLVMValueRef p = LLVMGetParam(func, i);
ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
}
/* Interpolate colors. */
for (i = 0; i < 2; i++) {
unsigned writemask = (key->ps_prolog.colors_read >> (i * 4)) & 0xf;
unsigned face_vgpr = key->ps_prolog.num_input_sgprs +
key->ps_prolog.face_vgpr_index;
LLVMValueRef interp[2], color[4];
LLVMValueRef interp_ij = NULL, prim_mask = NULL, face = NULL;
if (!writemask)
continue;
/* If the interpolation qualifier is not CONSTANT (-1). */
if (key->ps_prolog.color_interp_vgpr_index[i] != -1) {
unsigned interp_vgpr = key->ps_prolog.num_input_sgprs +
key->ps_prolog.color_interp_vgpr_index[i];
interp[0] = LLVMGetParam(func, interp_vgpr);
interp[1] = LLVMGetParam(func, interp_vgpr + 1);
interp_ij = lp_build_gather_values(gallivm, interp, 2);
interp_ij = LLVMBuildBitCast(gallivm->builder, interp_ij,
ctx.v2i32, "");
}
/* Use the absolute location of the input. */
prim_mask = LLVMGetParam(func, SI_PS_NUM_USER_SGPR);
if (key->ps_prolog.states.color_two_side) {
face = LLVMGetParam(func, face_vgpr);
face = LLVMBuildBitCast(gallivm->builder, face, ctx.i32, "");
}
interp_fs_input(&ctx,
key->ps_prolog.color_attr_index[i],
TGSI_SEMANTIC_COLOR, i,
key->ps_prolog.num_interp_inputs,
key->ps_prolog.colors_read, interp_ij,
prim_mask, face, color);
while (writemask) {
unsigned chan = u_bit_scan(&writemask);
ret = LLVMBuildInsertValue(gallivm->builder, ret, color[chan],
num_params++, "");
}
}
/* Force per-sample interpolation. */
if (key->ps_prolog.states.force_persample_interp) {
unsigned i, base = key->ps_prolog.num_input_sgprs;
LLVMValueRef persp_sample[2], linear_sample[2];
/* Read PERSP_SAMPLE. */
for (i = 0; i < 2; i++)
persp_sample[i] = LLVMGetParam(func, base + i);
/* Overwrite PERSP_CENTER. */
for (i = 0; i < 2; i++)
ret = LLVMBuildInsertValue(gallivm->builder, ret,
persp_sample[i], base + 2 + i, "");
/* Overwrite PERSP_CENTROID. */
for (i = 0; i < 2; i++)
ret = LLVMBuildInsertValue(gallivm->builder, ret,
persp_sample[i], base + 4 + i, "");
/* Read LINEAR_SAMPLE. */
for (i = 0; i < 2; i++)
linear_sample[i] = LLVMGetParam(func, base + 6 + i);
/* Overwrite LINEAR_CENTER. */
for (i = 0; i < 2; i++)
ret = LLVMBuildInsertValue(gallivm->builder, ret,
linear_sample[i], base + 8 + i, "");
/* Overwrite LINEAR_CENTROID. */
for (i = 0; i < 2; i++)
ret = LLVMBuildInsertValue(gallivm->builder, ret,
linear_sample[i], base + 10 + i, "");
}
/* TODO: polygon stippling */
/* Compile. */
LLVMBuildRet(gallivm->builder, ret);
radeon_llvm_finalize_module(&ctx.radeon_bld);
if (si_compile_llvm(sscreen, &out->binary, &out->config, tm,
gallivm->module, debug, ctx.type,
"Fragment Shader Prolog"))
status = false;
radeon_llvm_dispose(&ctx.radeon_bld);
return status;
}
/**
* Compile the pixel shader epilog. This handles everything that must be
* emulated for pixel shader exports. (alpha-test, format conversions, etc)
@ -5430,7 +5627,103 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
struct pipe_debug_callback *debug)
{
struct tgsi_shader_info *info = &shader->selector->info;
union si_shader_part_key prolog_key;
union si_shader_part_key epilog_key;
unsigned i;
/* Get the prolog. */
memset(&prolog_key, 0, sizeof(prolog_key));
prolog_key.ps_prolog.states = shader->key.ps.prolog;
prolog_key.ps_prolog.colors_read = info->colors_read;
prolog_key.ps_prolog.num_input_sgprs = shader->num_input_sgprs;
prolog_key.ps_prolog.num_input_vgprs = shader->num_input_vgprs;
if (info->colors_read) {
unsigned *color = shader->selector->color_attr_index;
if (shader->key.ps.prolog.color_two_side) {
/* BCOLORs are stored after the last input. */
prolog_key.ps_prolog.num_interp_inputs = info->num_inputs;
prolog_key.ps_prolog.face_vgpr_index = shader->face_vgpr_index;
shader->config.spi_ps_input_ena |= S_0286CC_FRONT_FACE_ENA(1);
}
for (i = 0; i < 2; i++) {
unsigned location = info->input_interpolate_loc[color[i]];
if (!(info->colors_read & (0xf << i*4)))
continue;
prolog_key.ps_prolog.color_attr_index[i] = color[i];
/* Force per-sample interpolation for the colors here. */
if (shader->key.ps.prolog.force_persample_interp)
location = TGSI_INTERPOLATE_LOC_SAMPLE;
switch (info->input_interpolate[color[i]]) {
case TGSI_INTERPOLATE_CONSTANT:
prolog_key.ps_prolog.color_interp_vgpr_index[i] = -1;
break;
case TGSI_INTERPOLATE_PERSPECTIVE:
case TGSI_INTERPOLATE_COLOR:
switch (location) {
case TGSI_INTERPOLATE_LOC_SAMPLE:
prolog_key.ps_prolog.color_interp_vgpr_index[i] = 0;
shader->config.spi_ps_input_ena |=
S_0286CC_PERSP_SAMPLE_ENA(1);
break;
case TGSI_INTERPOLATE_LOC_CENTER:
prolog_key.ps_prolog.color_interp_vgpr_index[i] = 2;
shader->config.spi_ps_input_ena |=
S_0286CC_PERSP_CENTER_ENA(1);
break;
case TGSI_INTERPOLATE_LOC_CENTROID:
prolog_key.ps_prolog.color_interp_vgpr_index[i] = 4;
shader->config.spi_ps_input_ena |=
S_0286CC_PERSP_CENTROID_ENA(1);
break;
default:
assert(0);
}
break;
case TGSI_INTERPOLATE_LINEAR:
switch (location) {
case TGSI_INTERPOLATE_LOC_SAMPLE:
prolog_key.ps_prolog.color_interp_vgpr_index[i] = 6;
shader->config.spi_ps_input_ena |=
S_0286CC_LINEAR_SAMPLE_ENA(1);
break;
case TGSI_INTERPOLATE_LOC_CENTER:
prolog_key.ps_prolog.color_interp_vgpr_index[i] = 8;
shader->config.spi_ps_input_ena |=
S_0286CC_LINEAR_CENTER_ENA(1);
break;
case TGSI_INTERPOLATE_LOC_CENTROID:
prolog_key.ps_prolog.color_interp_vgpr_index[i] = 10;
shader->config.spi_ps_input_ena |=
S_0286CC_LINEAR_CENTROID_ENA(1);
break;
default:
assert(0);
}
break;
default:
assert(0);
}
}
}
/* The prolog is a no-op if these aren't set. */
if (prolog_key.ps_prolog.colors_read ||
prolog_key.ps_prolog.states.force_persample_interp ||
prolog_key.ps_prolog.states.poly_stipple) {
shader->prolog =
si_get_shader_part(sscreen, &sscreen->ps_prologs,
&prolog_key, tm, debug,
si_compile_ps_prolog);
if (!shader->prolog)
return false;
}
/* Get the epilog. */
memset(&epilog_key, 0, sizeof(epilog_key));
@ -5447,6 +5740,35 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
if (!shader->epilog)
return false;
/* Set up the enable bits for per-sample shading if needed. */
if (shader->key.ps.prolog.force_persample_interp) {
if (G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_ena) ||
G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena)) {
shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTER_ENA;
shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTROID_ENA;
shader->config.spi_ps_input_ena |= S_0286CC_PERSP_SAMPLE_ENA(1);
}
if (G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_ena) ||
G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena)) {
shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTER_ENA;
shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTROID_ENA;
shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_SAMPLE_ENA(1);
}
}
/* POW_W_FLOAT requires that one of the perspective weights is enabled. */
if (G_0286CC_POS_W_FLOAT_ENA(shader->config.spi_ps_input_ena) &&
!(shader->config.spi_ps_input_ena & 0xf)) {
shader->config.spi_ps_input_ena |= S_0286CC_PERSP_CENTER_ENA(1);
assert(G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_addr));
}
/* At least one pair of interpolation weights must be enabled. */
if (!(shader->config.spi_ps_input_ena & 0x7f)) {
shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_CENTER_ENA(1);
assert(G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_addr));
}
/* The sample mask input is always enabled, because the API shader always
* passes it through to the epilog. Disable it here if it's unused.
*/

View File

@ -169,7 +169,7 @@ struct radeon_shader_reloc;
#define SI_PARAM_SAMPLE_COVERAGE 20
#define SI_PARAM_POS_FIXED_PT 21
#define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 1)
#define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 9) /* +8 for COLOR[0..1] */
struct si_shader;
@ -199,6 +199,7 @@ struct si_shader_selector {
unsigned max_gsvs_emit_size;
/* PS parameters. */
unsigned color_attr_index[2];
unsigned db_shader_control;
/* Set 0xf or 0x0 (4 bits) per each written output.
* ANDed with spi_shader_col_format.
@ -281,6 +282,17 @@ union si_shader_part_key {
struct {
struct si_tcs_epilog_bits states;
} tcs_epilog;
struct {
struct si_ps_prolog_bits states;
unsigned num_input_sgprs:5;
unsigned num_input_vgprs:5;
/* Color interpolation and two-side color selection. */
unsigned colors_read:8; /* color input components read */
unsigned num_interp_inputs:5; /* BCOLOR is at this location */
unsigned face_vgpr_index:5;
char color_attr_index[2];
char color_interp_vgpr_index[2]; /* -1 == constant */
} ps_prolog;
struct {
struct si_ps_epilog_bits states;
unsigned colors_written:8;

View File

@ -903,6 +903,13 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
for (i = 0; i < 8; i++)
if (sel->info.colors_written & (1 << i))
sel->colors_written_4bit |= 0xf << (4 * i);
for (i = 0; i < sel->info.num_inputs; i++) {
if (sel->info.input_semantic_name[i] == TGSI_SEMANTIC_COLOR) {
int index = sel->info.input_semantic_index[i];
sel->color_attr_index[index] = i;
}
}
break;
}