llvmpipe: defer attribute interpolation until after mask and ztest

Don't calculate 1/w for quads which aren't visible...
This commit is contained in:
Keith Whitwell 2010-10-06 22:25:48 +01:00
parent d0bfb3c514
commit 8009886b00
3 changed files with 34 additions and 14 deletions

View File

@ -272,7 +272,10 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
* This is called when we move from one quad to the next.
*/
static void
attribs_update(struct lp_build_interp_soa_context *bld, int quad_index)
attribs_update(struct lp_build_interp_soa_context *bld,
int quad_index,
int start,
int end)
{
struct lp_build_context *coeff_bld = &bld->coeff_bld;
LLVMValueRef shuffle = lp_build_const_int_vec(coeff_bld->type, quad_index);
@ -282,7 +285,7 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index)
assert(quad_index < 4);
for(attrib = 0; attrib < bld->num_attribs; ++attrib) {
for(attrib = start; attrib < end; ++attrib) {
const unsigned mask = bld->mask[attrib];
const unsigned interp = bld->interp[attrib];
for(chan = 0; chan < NUM_CHANNELS; ++chan) {
@ -442,8 +445,6 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
pos_init(bld, x0, y0);
coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr);
attribs_update(bld, 0);
}
@ -451,10 +452,20 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
* Advance the position and inputs to the given quad within the block.
*/
void
lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld,
int quad_index)
lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld,
int quad_index)
{
assert(quad_index < 4);
attribs_update(bld, quad_index);
attribs_update(bld, quad_index, 1, bld->num_attribs);
}
void
lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld,
int quad_index)
{
assert(quad_index < 4);
attribs_update(bld, quad_index, 0, 1);
}

View File

@ -89,7 +89,11 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
LLVMValueRef y);
void
lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld,
lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld,
int quad_index);
void
lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld,
int quad_index);

View File

@ -262,7 +262,7 @@ generate_fs(struct llvmpipe_context *lp,
struct lp_type type,
LLVMValueRef context_ptr,
unsigned i,
const struct lp_build_interp_soa_context *interp,
struct lp_build_interp_soa_context *interp,
struct lp_build_sampler_soa *sampler,
LLVMValueRef *pmask,
LLVMValueRef (*color)[4],
@ -276,7 +276,7 @@ generate_fs(struct llvmpipe_context *lp,
LLVMTypeRef vec_type;
LLVMValueRef consts_ptr;
LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
LLVMValueRef z = interp->pos[2];
LLVMValueRef z;
LLVMValueRef stencil_refs[2];
struct lp_build_flow_context *flow;
struct lp_build_mask_context mask;
@ -307,7 +307,6 @@ generate_fs(struct llvmpipe_context *lp,
lp_build_flow_scope_declare(flow, &color[cbuf][chan]);
}
}
lp_build_flow_scope_declare(flow, &z);
/* do triangle edge testing */
if (partial_mask) {
@ -321,6 +320,13 @@ generate_fs(struct llvmpipe_context *lp,
/* 'mask' will control execution based on quad's pixel alive/killed state */
lp_build_mask_begin(&mask, flow, type, *pmask);
lp_build_interp_soa_update_pos(interp, i);
/* Try to avoid the 1/w for quads where mask is zero. TODO: avoid
* this for depth-fail quads also.
*/
z = interp->pos[2];
early_depth_stencil_test =
(key->depth.enabled || key->stencil[0].enabled) &&
!key->alpha.enabled &&
@ -332,6 +338,8 @@ generate_fs(struct llvmpipe_context *lp,
type, &mask,
stencil_refs, z, depth_ptr, facing, counter);
lp_build_interp_soa_update_inputs(interp, i);
lp_build_tgsi_soa(builder, tokens, type, &mask,
consts_ptr, interp->pos, interp->inputs,
outputs, sampler, &shader->info);
@ -621,9 +629,6 @@ generate_fragment(struct llvmpipe_context *lp,
LLVMValueRef out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS];
LLVMValueRef depth_ptr_i;
if(i != 0)
lp_build_interp_soa_update(&interp, i);
depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, "");
generate_fs(lp, shader, key,