From 5b7eb868fde98388d80601d8dea39e679828f42f Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 9 Oct 2010 11:28:00 +0100 Subject: [PATCH] llvmpipe: clean up shader pre/postamble, try to catch more early-z Specifically, can do early-depth-test even when alpahtest or kill-pixel are active, providing we defer the actual z write until the final mask is avaialable. Improves demos/fire.c especially in the case where you get close to the trees. --- src/gallium/drivers/llvmpipe/lp_bld_depth.c | 40 +++- src/gallium/drivers/llvmpipe/lp_bld_depth.h | 15 +- src/gallium/drivers/llvmpipe/lp_state_fs.c | 241 ++++++++++++-------- 3 files changed, 193 insertions(+), 103 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 6b8ffb6ca26..8d9be2ebbbf 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -410,7 +410,7 @@ get_s_shift_and_mask(const struct util_format_description *format_desc, * \param maskvalue is the depth test mask. * \param counter is a pointer of the uint32 counter. */ -static void +void lp_build_occlusion_count(LLVMBuilderRef builder, struct lp_type type, LLVMValueRef maskvalue, @@ -462,7 +462,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, LLVMValueRef z_src, LLVMValueRef zs_dst_ptr, LLVMValueRef face, - LLVMValueRef counter, + LLVMValueRef *zs_value, boolean do_branch) { struct lp_type type; @@ -524,17 +524,14 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, * storage. */ if (depth->writemask) { - type.sign = 0; + type.sign = 1; lp_build_context_init(&bld, builder, type); z_dst = lp_build_select(&bld, mask->value, z_src, z_dst); z_dst = LLVMBuildShl(builder, z_dst, const_8_int, "z_dst"); - LLVMBuildStore(builder, z_dst, zs_dst_ptr); + *zs_value = z_dst; } - if (counter) - lp_build_occlusion_count(builder, type, mask->value, counter); - return; } @@ -779,7 +776,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, else zs_dst = stencil_vals; - LLVMBuildStore(builder, zs_dst, zs_dst_ptr); + *zs_value = zs_dst; } if (s_pass_mask) @@ -791,6 +788,29 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, if (do_branch) lp_build_mask_check(mask); - if (counter) - lp_build_occlusion_count(builder, type, mask->value, counter); +} + + + +void +lp_build_deferred_depth_write(LLVMBuilderRef builder, + struct lp_type z_src_type, + const struct util_format_description *format_desc, + struct lp_build_mask_context *mask, + LLVMValueRef zs_dst_ptr, + LLVMValueRef zs_value) +{ + struct lp_type type; + struct lp_build_context bld; + LLVMValueRef z_dst; + + /* XXX: pointlessly redo type logic: + */ + type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length); + lp_build_context_init(&bld, builder, type); + + z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval"); + z_dst = lp_build_select(&bld, mask->value, zs_value, z_dst); + + LLVMBuildStore(builder, z_dst, zs_dst_ptr); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h index 2a63bb9378b..0f89668123a 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h @@ -61,8 +61,21 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, LLVMValueRef zs_src, LLVMValueRef zs_dst_ptr, LLVMValueRef facing, - LLVMValueRef counter, + LLVMValueRef *zs_value, boolean do_branch); +void +lp_build_deferred_depth_write(LLVMBuilderRef builder, + struct lp_type z_src_type, + const struct util_format_description *format_desc, + struct lp_build_mask_context *mask, + LLVMValueRef zs_dst_ptr, + LLVMValueRef zs_value); + +void +lp_build_occlusion_count(LLVMBuilderRef builder, + struct lp_type type, + LLVMValueRef maskvalue, + LLVMValueRef counter); #endif /* !LP_BLD_DEPTH_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index df5dd83c875..f45f36f6332 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -104,43 +104,6 @@ static unsigned fs_no = 0; -/** - * Generate the depth /stencil test code. - */ -static void -generate_depth_stencil(LLVMBuilderRef builder, - const struct lp_fragment_shader_variant_key *key, - struct lp_type src_type, - struct lp_build_mask_context *mask, - LLVMValueRef stencil_refs[2], - LLVMValueRef src, - LLVMValueRef dst_ptr, - LLVMValueRef facing, - LLVMValueRef counter, - boolean do_branch) -{ - const struct util_format_description *format_desc; - - if (!key->depth.enabled && !key->stencil[0].enabled && !key->stencil[1].enabled) - return; - - format_desc = util_format_description(key->zsbuf_format); - assert(format_desc); - - lp_build_depth_stencil_test(builder, - &key->depth, - key->stencil, - src_type, - format_desc, - mask, - stencil_refs, - src, - dst_ptr, - facing, - counter, - do_branch); -} - /** * Expand the relevent bits of mask_input to a 4-dword mask for the @@ -222,6 +185,26 @@ generate_quad_mask(LLVMBuilderRef builder, } +#define EARLY_DEPTH_TEST 0x1 +#define LATE_DEPTH_TEST 0x2 +#define EARLY_DEPTH_WRITE 0x4 +#define LATE_DEPTH_WRITE 0x8 + +static int +find_output_by_semantic( const struct tgsi_shader_info *info, + unsigned semantic, + unsigned index ) +{ + int i; + + for (i = 0; i < info->num_outputs; i++) + if (info->output_semantic_name[i] == semantic && + info->output_semantic_index[i] == index) + return i; + + return -1; +} + /** * Generate the fragment shader, depth/stencil test, and alpha tests. @@ -246,21 +229,53 @@ generate_fs(struct llvmpipe_context *lp, LLVMValueRef mask_input, LLVMValueRef counter) { + const struct util_format_description *zs_format_desc = NULL; const struct tgsi_token *tokens = shader->base.tokens; LLVMTypeRef vec_type; LLVMValueRef consts_ptr; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; LLVMValueRef z; + LLVMValueRef zs_value = NULL; LLVMValueRef stencil_refs[2]; struct lp_build_flow_context *flow; struct lp_build_mask_context mask; - boolean early_depth_stencil_test; boolean simple_shader = (shader->info.file_count[TGSI_FILE_SAMPLER] == 0 && shader->info.num_inputs < 3 && shader->info.num_instructions < 8); unsigned attrib; unsigned chan; unsigned cbuf; + unsigned depth_mode; + + if (key->depth.enabled || + key->stencil[0].enabled || + key->stencil[1].enabled) { + + zs_format_desc = util_format_description(key->zsbuf_format); + assert(zs_format_desc); + + if (!shader->info.writes_z) { + if (key->alpha.enabled || shader->info.uses_kill) + /* With alpha test and kill, can do the depth test early + * and hopefully eliminate some quads. But need to do a + * special deferred depth write once the final mask value + * is known. + */ + depth_mode = EARLY_DEPTH_TEST | LATE_DEPTH_WRITE; + else + depth_mode = EARLY_DEPTH_TEST | EARLY_DEPTH_WRITE; + } + else { + depth_mode = LATE_DEPTH_TEST | LATE_DEPTH_WRITE; + } + + if (!(key->depth.enabled && key->depth.writemask) && + !(key->stencil[0].enabled && key->stencil[0].writemask)) + depth_mode &= ~(LATE_DEPTH_WRITE | EARLY_DEPTH_WRITE); + } + else { + depth_mode = 0; + } assert(i < 4); @@ -293,79 +308,121 @@ generate_fs(struct llvmpipe_context *lp, *pmask = lp_build_const_int_vec(type, ~0); } - - early_depth_stencil_test = - (key->depth.enabled || key->stencil[0].enabled) && - !key->alpha.enabled && - !shader->info.uses_kill && - !shader->info.writes_z; - /* 'mask' will control execution based on quad's pixel alive/killed state */ lp_build_mask_begin(&mask, flow, type, *pmask); - if (!early_depth_stencil_test && !simple_shader) + if (!(depth_mode & EARLY_DEPTH_TEST) && !simple_shader) lp_build_mask_check(&mask); lp_build_interp_soa_update_pos(interp, i); z = interp->pos[2]; - if (early_depth_stencil_test) - generate_depth_stencil(builder, key, - type, &mask, - stencil_refs, - z, depth_ptr, - facing, counter, - !simple_shader); + if (depth_mode & EARLY_DEPTH_TEST) { + lp_build_depth_stencil_test(builder, + &key->depth, + key->stencil, + type, + zs_format_desc, + &mask, + stencil_refs, + z, + depth_ptr, facing, + &zs_value, + !simple_shader); + + if (depth_mode & EARLY_DEPTH_WRITE) + LLVMBuildStore(builder, zs_value, depth_ptr); + } lp_build_interp_soa_update_inputs(interp, i); - + + /* Build the actual shader */ lp_build_tgsi_soa(builder, tokens, type, &mask, consts_ptr, interp->pos, interp->inputs, outputs, sampler, &shader->info); - /* loop over fragment shader outputs/results */ - for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) { - for(chan = 0; chan < NUM_CHANNELS; ++chan) { - if(outputs[attrib][chan]) { + + /* Alpha test */ + if (key->alpha.enabled) { + int color0 = find_output_by_semantic(&shader->info, + TGSI_SEMANTIC_COLOR, + 0); + + if (color0 != -1) { + LLVMValueRef alpha = LLVMBuildLoad(builder, outputs[color0][3], "alpha"); + LLVMValueRef alpha_ref_value; + + alpha_ref_value = lp_jit_context_alpha_ref_value(builder, context_ptr); + alpha_ref_value = lp_build_broadcast(builder, vec_type, alpha_ref_value); + + lp_build_alpha_test(builder, key->alpha.func, type, + &mask, alpha, alpha_ref_value, + (depth_mode & LATE_DEPTH_TEST) != 0); + } + } + + /* Late Z test */ + if (depth_mode & LATE_DEPTH_TEST) { + int pos0 = find_output_by_semantic(&shader->info, + TGSI_SEMANTIC_POSITION, + 0); + + if (pos0 != -1) { + z = LLVMBuildLoad(builder, outputs[pos0][2], "z"); + lp_build_name(z, "output%u.%u.%c", i, pos0, "xyzw"[chan]); + } + + lp_build_depth_stencil_test(builder, + &key->depth, + key->stencil, + type, + zs_format_desc, + &mask, + stencil_refs, + z, + depth_ptr, facing, + &zs_value, + !simple_shader); + /* Late Z write */ + if (depth_mode & LATE_DEPTH_WRITE) + LLVMBuildStore(builder, zs_value, depth_ptr); + } + else if ((depth_mode & EARLY_DEPTH_TEST) && + (depth_mode & LATE_DEPTH_WRITE)) + { + /* Need to apply a reduced mask to the depth write. Reload the + * depth value, update from zs_value with the new mask value and + * write that out. + */ + lp_build_deferred_depth_write(builder, + type, + zs_format_desc, + &mask, + depth_ptr, + zs_value); + } + + + /* Color write */ + for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) + { + if (shader->info.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR) + { + unsigned cbuf = shader->info.output_semantic_index[attrib]; + for(chan = 0; chan < NUM_CHANNELS; ++chan) + { + /* XXX: just initialize outputs to point at colors[] and + * skip this. + */ LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], ""); - lp_build_name(out, "output%u.%u.%c", i, attrib, "xyzw"[chan]); - - switch (shader->info.output_semantic_name[attrib]) { - case TGSI_SEMANTIC_COLOR: - { - unsigned cbuf = shader->info.output_semantic_index[attrib]; - - lp_build_name(out, "color%u.%u.%c", i, attrib, "rgba"[chan]); - - /* Alpha test */ - /* XXX: should only test the final assignment to alpha */ - if (cbuf == 0 && chan == 3 && key->alpha.enabled) { - LLVMValueRef alpha = out; - LLVMValueRef alpha_ref_value; - alpha_ref_value = lp_jit_context_alpha_ref_value(builder, context_ptr); - alpha_ref_value = lp_build_broadcast(builder, vec_type, alpha_ref_value); - lp_build_alpha_test(builder, key->alpha.func, type, - &mask, alpha, alpha_ref_value, FALSE); - } - - LLVMBuildStore(builder, out, color[cbuf][chan]); - break; - } - - case TGSI_SEMANTIC_POSITION: - if(chan == 2) - z = out; - break; - } + lp_build_name(out, "color%u.%u.%c", i, attrib, "rgba"[chan]); + LLVMBuildStore(builder, out, color[cbuf][chan]); } } } - if (!early_depth_stencil_test) - generate_depth_stencil(builder, key, - type, &mask, - stencil_refs, z, depth_ptr, - facing, counter, FALSE); + if (counter) + lp_build_occlusion_count(builder, type, mask.value, counter); lp_build_mask_end(&mask);