i965: Make brw_compute_vue_map's userclip dependency a boolean.

Previously, brw_compute_vue_map required an argument indicating the
number of clip planes in use, but all it did with it was check if it
was nonzero.

This patch changes brw_compute_vue_map to take a boolean instead.
This allows us to avoid some unnecessary recompilation of the Gen4/5
GS and SF threads.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
This commit is contained in:
Paul Berry 2011-09-27 12:33:28 -07:00
parent 8f6920a7b6
commit 18e2e19b07
13 changed files with 32 additions and 25 deletions

View File

@ -69,7 +69,7 @@ static void compile_clip_prog( struct brw_context *brw,
c.func.single_program_flow = 1;
c.key = *key;
brw_compute_vue_map(&c.vue_map, intel, c.key.nr_userclip, c.key.attrs);
brw_compute_vue_map(&c.vue_map, intel, c.key.nr_userclip > 0, c.key.attrs);
/* nr_regs is the number of registers filled by reading data from the VUE.
* This program accesses the entire VUE, so nr_regs needs to be the size of

View File

@ -965,7 +965,8 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen);
/* brw_vs.c */
void brw_compute_vue_map(struct brw_vue_map *vue_map,
const struct intel_context *intel, int nr_userclip,
const struct intel_context *intel,
bool userclip_active,
GLbitfield64 outputs_written);
gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx);

View File

@ -64,7 +64,7 @@ static void compile_gs_prog( struct brw_context *brw,
c.key = *key;
/* The geometry shader needs to access the entire VUE. */
struct brw_vue_map vue_map;
brw_compute_vue_map(&vue_map, intel, c.key.nr_userclip, c.key.attrs);
brw_compute_vue_map(&vue_map, intel, c.key.userclip_active, c.key.attrs);
c.nr_regs = (vue_map.num_slots + 1)/2;
mem_ctx = NULL;
@ -159,7 +159,7 @@ static void populate_key( struct brw_context *brw,
}
/* _NEW_TRANSFORM */
key->nr_userclip = _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled);
key->userclip_active = (ctx->Transform.ClipPlanesEnabled != 0);
key->need_gs_prog = (intel->gen >= 6)
? 0

View File

@ -44,8 +44,7 @@ struct brw_gs_prog_key {
GLuint primitive:4;
GLuint pv_first:1;
GLuint need_gs_prog:1;
GLuint nr_userclip:4;
GLuint pad:22;
GLuint userclip_active:1;
};
struct brw_gs_compile {

View File

@ -63,7 +63,7 @@ static void compile_sf_prog( struct brw_context *brw,
brw_init_compile(brw, &c.func, mem_ctx);
c.key = *key;
brw_compute_vue_map(&c.vue_map, intel, c.key.nr_userclip, c.key.attrs);
brw_compute_vue_map(&c.vue_map, intel, c.key.userclip_active, c.key.attrs);
c.urb_entry_read_offset = brw_sf_compute_urb_entry_read_offset(intel);
c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset;
c.nr_setup_regs = c.nr_attr_regs;
@ -154,7 +154,7 @@ static void upload_sf_prog(struct brw_context *brw)
}
/* _NEW_TRANSFORM */
key.nr_userclip = _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled);
key.userclip_active = (ctx->Transform.ClipPlanesEnabled != 0);
/* _NEW_POINT */
key.do_point_sprite = ctx->Point.PointSprite;

View File

@ -53,8 +53,7 @@ struct brw_sf_prog_key {
GLuint frontface_ccw:1;
GLuint do_point_sprite:1;
GLuint sprite_origin_lower_left:1;
GLuint nr_userclip:4;
GLuint pad:20;
GLuint userclip_active:1;
};
struct brw_sf_compile {

View File

@ -595,7 +595,7 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,
bool
vec4_visitor::run()
{
if (c->key.nr_userclip && !c->key.uses_clip_distance)
if (c->key.userclip_active && !c->key.uses_clip_distance)
setup_uniform_clipplane_values();
/* Generate VS IR for main(). (the visitor only descends into

View File

@ -1790,7 +1790,7 @@ vec4_visitor::emit_psiz_and_flags(struct brw_reg reg)
{
if (intel->gen < 6 &&
((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
c->key.nr_userclip || brw->has_negative_rhw_bug)) {
c->key.userclip_active || brw->has_negative_rhw_bug)) {
dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
dst_reg header1_w = header1;
header1_w.writemask = WRITEMASK_W;
@ -1996,7 +1996,7 @@ vec4_visitor::emit_urb_writes()
/* FINISHME: edgeflag */
brw_compute_vue_map(&c->vue_map, intel, c->key.nr_userclip,
brw_compute_vue_map(&c->vue_map, intel, c->key.userclip_active,
c->prog_data.outputs_written);
/* First mrf is the g0-based message header containing URB handles and such,

View File

@ -55,7 +55,8 @@ static inline void assign_vue_slot(struct brw_vue_map *vue_map,
*/
void
brw_compute_vue_map(struct brw_vue_map *vue_map,
const struct intel_context *intel, int nr_userclip,
const struct intel_context *intel,
bool userclip_active,
GLbitfield64 outputs_written)
{
int i;
@ -112,7 +113,7 @@ brw_compute_vue_map(struct brw_vue_map *vue_map,
*/
assign_vue_slot(vue_map, VERT_RESULT_PSIZ);
assign_vue_slot(vue_map, VERT_RESULT_HPOS);
if (nr_userclip) {
if (userclip_active) {
assign_vue_slot(vue_map, VERT_RESULT_CLIP_DIST0);
assign_vue_slot(vue_map, VERT_RESULT_CLIP_DIST1);
}
@ -287,6 +288,7 @@ static void brw_upload_vs_prog(struct brw_context *brw)
* the inputs it asks for, whether they are varying or not.
*/
key.program_string_id = vp->id;
key.userclip_active = (ctx->Transform.ClipPlanesEnabled != 0);
key.nr_userclip = _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled);
key.uses_clip_distance = vp->program.UsesClipDistance;
if (!key.uses_clip_distance)

View File

@ -46,6 +46,12 @@ struct brw_vs_prog_key {
*/
uint8_t gl_fixed_input_size[VERT_ATTRIB_MAX];
/**
* True if at least one clip flag is enabled, regardless of whether the
* shader uses clip planes or gl_ClipDistance.
*/
GLuint userclip_active:1;
/**
* Number of user clip planes (or clip distances) that are active.
*/

View File

@ -202,7 +202,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
/* User clip planes from curbe:
*/
if (c->key.nr_userclip) {
if (c->key.userclip_active) {
if (intel->gen >= 6) {
for (i = 0; i < c->key.nr_userclip; i++) {
c->userplane[i] = stride(brw_vec4_grf(reg + i / 2,
@ -325,7 +325,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
/* Allocate outputs. The non-position outputs go straight into message regs.
*/
brw_compute_vue_map(&c->vue_map, intel, c->key.nr_userclip,
brw_compute_vue_map(&c->vue_map, intel, c->key.userclip_active,
c->prog_data.outputs_written);
c->first_output = reg;
@ -1564,7 +1564,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
}
/* Set the user clip distances in dword 8-15. (m3-4)*/
if (c->key.nr_userclip) {
if (c->key.userclip_active) {
for (i = 0; i < c->key.nr_userclip; i++) {
struct brw_reg m;
if (i < 4)
@ -1577,7 +1577,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
}
} else if ((c->prog_data.outputs_written &
BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
c->key.nr_userclip || brw->has_negative_rhw_bug) {
c->key.userclip_active || brw->has_negative_rhw_bug) {
struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
GLuint i;
@ -1649,7 +1649,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
*/
brw_MOV(p, brw_message_reg(2), pos);
len_vertex_header = 1;
if (c->key.nr_userclip > 0)
if (c->key.userclip_active)
len_vertex_header += 2;
} else if (intel->gen == 5) {
/* There are 20 DWs (D0-D19) in VUE header on Ironlake:

View File

@ -126,12 +126,12 @@ upload_sf_state(struct brw_context *brw)
int urb_entry_read_offset = 1;
float point_size;
uint16_t attr_overrides[FRAG_ATTRIB_MAX];
int nr_userclip;
bool userclip_active;
/* _NEW_TRANSFORM */
nr_userclip = _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled);
userclip_active = (ctx->Transform.ClipPlanesEnabled != 0);
brw_compute_vue_map(&vue_map, intel, nr_userclip, vs_outputs_written);
brw_compute_vue_map(&vue_map, intel, userclip_active, vs_outputs_written);
urb_entry_read_length = (vue_map.num_slots + 1)/2 - urb_entry_read_offset;
if (urb_entry_read_length == 0) {
/* Setting the URB entry read length to 0 causes undefined behavior, so

View File

@ -44,10 +44,10 @@ upload_sbe_state(struct brw_context *brw)
int attr = 0, input_index = 0;
/* _NEW_TRANSFORM */
int urb_entry_read_offset = 1;
int nr_userclip = _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled);
bool userclip_active = (ctx->Transform.ClipPlanesEnabled != 0);
uint16_t attr_overrides[FRAG_ATTRIB_MAX];
brw_compute_vue_map(&vue_map, intel, nr_userclip, vs_outputs_written);
brw_compute_vue_map(&vue_map, intel, userclip_active, vs_outputs_written);
urb_entry_read_length = (vue_map.num_slots + 1)/2 - urb_entry_read_offset;
if (urb_entry_read_length == 0) {
/* Setting the URB entry read length to 0 causes undefined behavior, so