freedreno/ir3: handle flat bypass for a4xx

We may not need this for later a4xx patchlevels, but we do at least need
this for patchlevel 0.  Bypass bary.f for fetching varyings when flat
shading is needed (rather than configure via cmdstream).  This requires
a special dummy bary.f w/ (ei) flag to signal to scheduler when all
varyings are consumed.  And requires shader variants based on rasterizer
flatshade state to handle TGSI_INTERPOLATE_COLOR.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
This commit is contained in:
Rob Clark 2015-02-25 13:54:25 -05:00
parent 9d732d3125
commit e9f2abe349
8 changed files with 99 additions and 5 deletions

View File

@ -100,6 +100,9 @@ fixup_shader_state(struct fd_context *ctx, struct ir3_shader_key *key)
if (last_key->alpha != key->alpha)
ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
if (last_key->rasterflat != key->rasterflat)
ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
fd4_ctx->last_key = *key;
}
}
@ -118,6 +121,7 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
.binning_pass = true,
.color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false,
.alpha = util_format_is_alpha(pipe_surface_format(pfb->cbufs[0])),
.rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
// TODO set .half_precision based on render target format,
// ie. float16 and smaller use half, float32 use full..
.half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
@ -130,7 +134,6 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
.fsaturate_r = fd4_ctx->fsaturate_r,
},
.format = fd4_emit_format(pfb->cbufs[0]),
.rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
};
unsigned dirty;

View File

@ -55,7 +55,6 @@ struct fd4_emit {
struct ir3_shader_key key;
enum a4xx_color_fmt format;
uint32_t dirty;
bool rasterflat;
/* cached to avoid repeated lookups of same variants: */
struct ir3_shader_variant *vp, *fp;

View File

@ -113,6 +113,8 @@ static uint32_t reg(struct ir3_register *reg, struct ir3_info *info,
if (reg->flags & IR3_REG_CONST) {
info->max_const = MAX2(info->max_const, max);
} else if (val.num == 63) {
/* ignore writes to dummy register r63.x */
} else if ((max != REG_A0) && (max != REG_P0)) {
if (reg->flags & IR3_REG_HALF) {
info->max_half_reg = MAX2(info->max_half_reg, max);

View File

@ -427,6 +427,12 @@ static inline bool is_mem(struct ir3_instruction *instr)
static inline bool is_input(struct ir3_instruction *instr)
{
/* in some cases, ldlv is used to fetch varying without
* interpolation.. fortunately inloc is the first src
* register in either case
*/
if (is_mem(instr) && (instr->opc == OPC_LDLV))
return true;
return (instr->category == 2) && (instr->opc == OPC_BARY_F);
}

View File

@ -105,6 +105,11 @@ struct ir3_compile_context {
/* for calculating input/output positions/linkages: */
unsigned next_inloc;
/* a4xx (at least patchlevel 0) cannot seem to flat-interpolate
* so we need to use ldlv.u32 to load the varying directly:
*/
bool flat_bypass;
unsigned num_internal_temps;
struct tgsi_src_register internal_temps[8];
@ -204,9 +209,13 @@ compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so,
} else if (ir3_shader_gpuid(so->shader) >= 400) {
/* a4xx seems to have *no* sam.p */
lconfig.lower_TXP = ~0; /* lower all txp */
/* need special handling for "flat" */
ctx->flat_bypass = true;
} else {
/* a3xx just needs to avoid sam.p for 3d tex */
lconfig.lower_TXP = (1 << TGSI_TEXTURE_3D);
/* no special handling for "flat" */
ctx->flat_bypass = false;
}
ctx->tokens = tgsi_transform_lowering(&lconfig, tokens, &ctx->info);
@ -2745,11 +2754,23 @@ decl_semantic(const struct tgsi_declaration_semantic *sem)
static struct ir3_instruction *
decl_in_frag_bary(struct ir3_compile_context *ctx, unsigned regid,
unsigned j, unsigned inloc)
unsigned j, unsigned inloc, bool use_ldlv)
{
struct ir3_instruction *instr;
struct ir3_register *src;
if (use_ldlv) {
/* ldlv.u32 dst, l[#inloc], 1 */
instr = instr_create(ctx, 6, OPC_LDLV);
instr->cat6.type = TYPE_U32;
instr->cat6.iim_val = 1;
ir3_reg_create(instr, regid, 0); /* dummy dst */
ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = inloc;
ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1;
return instr;
}
/* bary.f dst, #inloc, r0.x */
instr = instr_create(ctx, 2, OPC_BARY_F);
ir3_reg_create(instr, regid, 0); /* dummy dst */
@ -2943,9 +2964,31 @@ decl_in(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl)
so->frag_face = true;
instr = decl_in_frag_face(ctx, r + j, j);
} else {
bool use_ldlv = false;
/* I don't believe it is valid to not have Interp
* on a normal frag shader input, and various parts
* that that handle flat/smooth shading make this
* assumption as well.
*/
compile_assert(ctx, decl->Declaration.Interpolate);
if (ctx->flat_bypass) {
switch (decl->Interp.Interpolate) {
case TGSI_INTERPOLATE_COLOR:
if (!ctx->so->key.rasterflat)
break;
/* fallthrough */
case TGSI_INTERPOLATE_CONSTANT:
use_ldlv = true;
break;
}
}
so->inputs[n].bary = true;
instr = decl_in_frag_bary(ctx, r + j, j,
so->inputs[n].inloc + j - 8);
so->inputs[n].inloc + j - 8, use_ldlv);
}
} else {
instr = create_input(ctx->block, NULL, (i * 4) + j);

View File

@ -187,8 +187,44 @@ static void legalize(struct ir3_legalize_ctx *ctx)
last_input = n;
}
if (last_input)
if (last_input) {
/* special hack.. if using ldlv to bypass interpolation,
* we need to insert a dummy bary.f on which we can set
* the (ei) flag:
*/
if (is_mem(last_input) && (last_input->opc == OPC_LDLV)) {
int i, cnt;
/* note that ir3_instr_create() inserts into
* shader->instrs[] and increments the count..
* so we need to bump up the cnt initially (to
* avoid it clobbering the last real instr) and
* restore it after.
*/
cnt = ++shader->instrs_count;
/* inserting instructions would be a bit nicer if list.. */
for (i = cnt - 2; i >= 0; i--) {
if (shader->instrs[i] == last_input) {
/* (ss)bary.f (ei)r63.x, 0, r0.x */
last_input = ir3_instr_create(block, 2, OPC_BARY_F);
last_input->flags |= IR3_INSTR_SS;
ir3_reg_create(last_input, regid(63, 0), 0);
ir3_reg_create(last_input, 0, IR3_REG_IMMED)->iim_val = 0;
ir3_reg_create(last_input, regid(0, 0), 0);
shader->instrs[i + 1] = last_input;
break;
}
shader->instrs[i + 1] = shader->instrs[i];
}
shader->instrs_count = cnt;
}
last_input->regs[0]->flags |= IR3_REG_EI;
}
if (last_rel)
last_rel->flags |= IR3_INSTR_UL;

View File

@ -246,6 +246,7 @@ ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key)
key.color_two_side = false;
key.half_precision = false;
key.alpha = false;
key.rasterflat = false;
if (key.has_per_samp) {
key.fsaturate_s = 0;
key.fsaturate_t = 0;

View File

@ -77,6 +77,10 @@ struct ir3_shader_key {
* let's start with this and see how it goes:
*/
unsigned alpha : 1;
/* used when shader needs to handle flat varyings (a4xx),
* for TGSI_INTERPOLATE_COLOR:
*/
unsigned rasterflat : 1;
};
uint32_t global;
};