From 18e2e19b07b312c978dfbb6d336f69fa84b3ffe2 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Tue, 27 Sep 2011 12:33:28 -0700 Subject: [PATCH] i965: Make brw_compute_vue_map's userclip dependency a boolean. Previously, brw_compute_vue_map required an argument indicating the number of clip planes in use, but all it did with it was check if it was nonzero. This patch changes brw_compute_vue_map to take a boolean instead. This allows us to avoid some unnecessary recompilation of the Gen4/5 GS and SF threads. Reviewed-by: Ian Romanick Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/i965/brw_clip.c | 2 +- src/mesa/drivers/dri/i965/brw_context.h | 3 ++- src/mesa/drivers/dri/i965/brw_gs.c | 4 ++-- src/mesa/drivers/dri/i965/brw_gs.h | 3 +-- src/mesa/drivers/dri/i965/brw_sf.c | 4 ++-- src/mesa/drivers/dri/i965/brw_sf.h | 3 +-- src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 2 +- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 4 ++-- src/mesa/drivers/dri/i965/brw_vs.c | 6 ++++-- src/mesa/drivers/dri/i965/brw_vs.h | 6 ++++++ src/mesa/drivers/dri/i965/brw_vs_emit.c | 10 +++++----- src/mesa/drivers/dri/i965/gen6_sf_state.c | 6 +++--- src/mesa/drivers/dri/i965/gen7_sf_state.c | 4 ++-- 13 files changed, 32 insertions(+), 25 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c index 2dcf9e5df2d..ec851b743f4 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.c +++ b/src/mesa/drivers/dri/i965/brw_clip.c @@ -69,7 +69,7 @@ static void compile_clip_prog( struct brw_context *brw, c.func.single_program_flow = 1; c.key = *key; - brw_compute_vue_map(&c.vue_map, intel, c.key.nr_userclip, c.key.attrs); + brw_compute_vue_map(&c.vue_map, intel, c.key.nr_userclip > 0, c.key.attrs); /* nr_regs is the number of registers filled by reading data from the VUE. * This program accesses the entire VUE, so nr_regs needs to be the size of diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index e26437fa448..b335954e9a9 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -965,7 +965,8 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen); /* brw_vs.c */ void brw_compute_vue_map(struct brw_vue_map *vue_map, - const struct intel_context *intel, int nr_userclip, + const struct intel_context *intel, + bool userclip_active, GLbitfield64 outputs_written); gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx); diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index b7304b97134..3b56aae821b 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -64,7 +64,7 @@ static void compile_gs_prog( struct brw_context *brw, c.key = *key; /* The geometry shader needs to access the entire VUE. */ struct brw_vue_map vue_map; - brw_compute_vue_map(&vue_map, intel, c.key.nr_userclip, c.key.attrs); + brw_compute_vue_map(&vue_map, intel, c.key.userclip_active, c.key.attrs); c.nr_regs = (vue_map.num_slots + 1)/2; mem_ctx = NULL; @@ -159,7 +159,7 @@ static void populate_key( struct brw_context *brw, } /* _NEW_TRANSFORM */ - key->nr_userclip = _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled); + key->userclip_active = (ctx->Transform.ClipPlanesEnabled != 0); key->need_gs_prog = (intel->gen >= 6) ? 0 diff --git a/src/mesa/drivers/dri/i965/brw_gs.h b/src/mesa/drivers/dri/i965/brw_gs.h index d8637c8bb25..cee7467628b 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.h +++ b/src/mesa/drivers/dri/i965/brw_gs.h @@ -44,8 +44,7 @@ struct brw_gs_prog_key { GLuint primitive:4; GLuint pv_first:1; GLuint need_gs_prog:1; - GLuint nr_userclip:4; - GLuint pad:22; + GLuint userclip_active:1; }; struct brw_gs_compile { diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index f4a6f54d1e3..7a0cd924ff1 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -63,7 +63,7 @@ static void compile_sf_prog( struct brw_context *brw, brw_init_compile(brw, &c.func, mem_ctx); c.key = *key; - brw_compute_vue_map(&c.vue_map, intel, c.key.nr_userclip, c.key.attrs); + brw_compute_vue_map(&c.vue_map, intel, c.key.userclip_active, c.key.attrs); c.urb_entry_read_offset = brw_sf_compute_urb_entry_read_offset(intel); c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset; c.nr_setup_regs = c.nr_attr_regs; @@ -154,7 +154,7 @@ static void upload_sf_prog(struct brw_context *brw) } /* _NEW_TRANSFORM */ - key.nr_userclip = _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled); + key.userclip_active = (ctx->Transform.ClipPlanesEnabled != 0); /* _NEW_POINT */ key.do_point_sprite = ctx->Point.PointSprite; diff --git a/src/mesa/drivers/dri/i965/brw_sf.h b/src/mesa/drivers/dri/i965/brw_sf.h index f39ad27154d..8de5f5a3489 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.h +++ b/src/mesa/drivers/dri/i965/brw_sf.h @@ -53,8 +53,7 @@ struct brw_sf_prog_key { GLuint frontface_ccw:1; GLuint do_point_sprite:1; GLuint sprite_origin_lower_left:1; - GLuint nr_userclip:4; - GLuint pad:20; + GLuint userclip_active:1; }; struct brw_sf_compile { diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 5cbbda818cd..ccbad2527f8 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -595,7 +595,7 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction, bool vec4_visitor::run() { - if (c->key.nr_userclip && !c->key.uses_clip_distance) + if (c->key.userclip_active && !c->key.uses_clip_distance) setup_uniform_clipplane_values(); /* Generate VS IR for main(). (the visitor only descends into diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index f99dc51682b..71053060cbc 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1790,7 +1790,7 @@ vec4_visitor::emit_psiz_and_flags(struct brw_reg reg) { if (intel->gen < 6 && ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) || - c->key.nr_userclip || brw->has_negative_rhw_bug)) { + c->key.userclip_active || brw->has_negative_rhw_bug)) { dst_reg header1 = dst_reg(this, glsl_type::uvec4_type); dst_reg header1_w = header1; header1_w.writemask = WRITEMASK_W; @@ -1996,7 +1996,7 @@ vec4_visitor::emit_urb_writes() /* FINISHME: edgeflag */ - brw_compute_vue_map(&c->vue_map, intel, c->key.nr_userclip, + brw_compute_vue_map(&c->vue_map, intel, c->key.userclip_active, c->prog_data.outputs_written); /* First mrf is the g0-based message header containing URB handles and such, diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index eb673e29c96..e01752cdb71 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -55,7 +55,8 @@ static inline void assign_vue_slot(struct brw_vue_map *vue_map, */ void brw_compute_vue_map(struct brw_vue_map *vue_map, - const struct intel_context *intel, int nr_userclip, + const struct intel_context *intel, + bool userclip_active, GLbitfield64 outputs_written) { int i; @@ -112,7 +113,7 @@ brw_compute_vue_map(struct brw_vue_map *vue_map, */ assign_vue_slot(vue_map, VERT_RESULT_PSIZ); assign_vue_slot(vue_map, VERT_RESULT_HPOS); - if (nr_userclip) { + if (userclip_active) { assign_vue_slot(vue_map, VERT_RESULT_CLIP_DIST0); assign_vue_slot(vue_map, VERT_RESULT_CLIP_DIST1); } @@ -287,6 +288,7 @@ static void brw_upload_vs_prog(struct brw_context *brw) * the inputs it asks for, whether they are varying or not. */ key.program_string_id = vp->id; + key.userclip_active = (ctx->Transform.ClipPlanesEnabled != 0); key.nr_userclip = _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled); key.uses_clip_distance = vp->program.UsesClipDistance; if (!key.uses_clip_distance) diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index a12b1390aad..bd81764cb99 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -46,6 +46,12 @@ struct brw_vs_prog_key { */ uint8_t gl_fixed_input_size[VERT_ATTRIB_MAX]; + /** + * True if at least one clip flag is enabled, regardless of whether the + * shader uses clip planes or gl_ClipDistance. + */ + GLuint userclip_active:1; + /** * Number of user clip planes (or clip distances) that are active. */ diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 4f7a4422d76..e78cb6c60d4 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -202,7 +202,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* User clip planes from curbe: */ - if (c->key.nr_userclip) { + if (c->key.userclip_active) { if (intel->gen >= 6) { for (i = 0; i < c->key.nr_userclip; i++) { c->userplane[i] = stride(brw_vec4_grf(reg + i / 2, @@ -325,7 +325,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* Allocate outputs. The non-position outputs go straight into message regs. */ - brw_compute_vue_map(&c->vue_map, intel, c->key.nr_userclip, + brw_compute_vue_map(&c->vue_map, intel, c->key.userclip_active, c->prog_data.outputs_written); c->first_output = reg; @@ -1564,7 +1564,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) } /* Set the user clip distances in dword 8-15. (m3-4)*/ - if (c->key.nr_userclip) { + if (c->key.userclip_active) { for (i = 0; i < c->key.nr_userclip; i++) { struct brw_reg m; if (i < 4) @@ -1577,7 +1577,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) } } else if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) || - c->key.nr_userclip || brw->has_negative_rhw_bug) { + c->key.userclip_active || brw->has_negative_rhw_bug) { struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); GLuint i; @@ -1649,7 +1649,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) */ brw_MOV(p, brw_message_reg(2), pos); len_vertex_header = 1; - if (c->key.nr_userclip > 0) + if (c->key.userclip_active) len_vertex_header += 2; } else if (intel->gen == 5) { /* There are 20 DWs (D0-D19) in VUE header on Ironlake: diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index 306a6c6f2ec..d65bfda6c00 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -126,12 +126,12 @@ upload_sf_state(struct brw_context *brw) int urb_entry_read_offset = 1; float point_size; uint16_t attr_overrides[FRAG_ATTRIB_MAX]; - int nr_userclip; + bool userclip_active; /* _NEW_TRANSFORM */ - nr_userclip = _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled); + userclip_active = (ctx->Transform.ClipPlanesEnabled != 0); - brw_compute_vue_map(&vue_map, intel, nr_userclip, vs_outputs_written); + brw_compute_vue_map(&vue_map, intel, userclip_active, vs_outputs_written); urb_entry_read_length = (vue_map.num_slots + 1)/2 - urb_entry_read_offset; if (urb_entry_read_length == 0) { /* Setting the URB entry read length to 0 causes undefined behavior, so diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c index c40abe4d4bc..e796a01b74d 100644 --- a/src/mesa/drivers/dri/i965/gen7_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c @@ -44,10 +44,10 @@ upload_sbe_state(struct brw_context *brw) int attr = 0, input_index = 0; /* _NEW_TRANSFORM */ int urb_entry_read_offset = 1; - int nr_userclip = _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled); + bool userclip_active = (ctx->Transform.ClipPlanesEnabled != 0); uint16_t attr_overrides[FRAG_ATTRIB_MAX]; - brw_compute_vue_map(&vue_map, intel, nr_userclip, vs_outputs_written); + brw_compute_vue_map(&vue_map, intel, userclip_active, vs_outputs_written); urb_entry_read_length = (vue_map.num_slots + 1)/2 - urb_entry_read_offset; if (urb_entry_read_length == 0) { /* Setting the URB entry read length to 0 causes undefined behavior, so