i965: Make brw_compute_vue_map's userclip dependency a boolean.
Previously, brw_compute_vue_map required an argument indicating the number of clip planes in use, but all it did with it was check if it was nonzero. This patch changes brw_compute_vue_map to take a boolean instead. This allows us to avoid some unnecessary recompilation of the Gen4/5 GS and SF threads. Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Reviewed-by: Eric Anholt <eric@anholt.net>
This commit is contained in:
parent
8f6920a7b6
commit
18e2e19b07
|
@ -69,7 +69,7 @@ static void compile_clip_prog( struct brw_context *brw,
|
|||
c.func.single_program_flow = 1;
|
||||
|
||||
c.key = *key;
|
||||
brw_compute_vue_map(&c.vue_map, intel, c.key.nr_userclip, c.key.attrs);
|
||||
brw_compute_vue_map(&c.vue_map, intel, c.key.nr_userclip > 0, c.key.attrs);
|
||||
|
||||
/* nr_regs is the number of registers filled by reading data from the VUE.
|
||||
* This program accesses the entire VUE, so nr_regs needs to be the size of
|
||||
|
|
|
@ -965,7 +965,8 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen);
|
|||
|
||||
/* brw_vs.c */
|
||||
void brw_compute_vue_map(struct brw_vue_map *vue_map,
|
||||
const struct intel_context *intel, int nr_userclip,
|
||||
const struct intel_context *intel,
|
||||
bool userclip_active,
|
||||
GLbitfield64 outputs_written);
|
||||
gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx);
|
||||
|
||||
|
|
|
@ -64,7 +64,7 @@ static void compile_gs_prog( struct brw_context *brw,
|
|||
c.key = *key;
|
||||
/* The geometry shader needs to access the entire VUE. */
|
||||
struct brw_vue_map vue_map;
|
||||
brw_compute_vue_map(&vue_map, intel, c.key.nr_userclip, c.key.attrs);
|
||||
brw_compute_vue_map(&vue_map, intel, c.key.userclip_active, c.key.attrs);
|
||||
c.nr_regs = (vue_map.num_slots + 1)/2;
|
||||
|
||||
mem_ctx = NULL;
|
||||
|
@ -159,7 +159,7 @@ static void populate_key( struct brw_context *brw,
|
|||
}
|
||||
|
||||
/* _NEW_TRANSFORM */
|
||||
key->nr_userclip = _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled);
|
||||
key->userclip_active = (ctx->Transform.ClipPlanesEnabled != 0);
|
||||
|
||||
key->need_gs_prog = (intel->gen >= 6)
|
||||
? 0
|
||||
|
|
|
@ -44,8 +44,7 @@ struct brw_gs_prog_key {
|
|||
GLuint primitive:4;
|
||||
GLuint pv_first:1;
|
||||
GLuint need_gs_prog:1;
|
||||
GLuint nr_userclip:4;
|
||||
GLuint pad:22;
|
||||
GLuint userclip_active:1;
|
||||
};
|
||||
|
||||
struct brw_gs_compile {
|
||||
|
|
|
@ -63,7 +63,7 @@ static void compile_sf_prog( struct brw_context *brw,
|
|||
brw_init_compile(brw, &c.func, mem_ctx);
|
||||
|
||||
c.key = *key;
|
||||
brw_compute_vue_map(&c.vue_map, intel, c.key.nr_userclip, c.key.attrs);
|
||||
brw_compute_vue_map(&c.vue_map, intel, c.key.userclip_active, c.key.attrs);
|
||||
c.urb_entry_read_offset = brw_sf_compute_urb_entry_read_offset(intel);
|
||||
c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset;
|
||||
c.nr_setup_regs = c.nr_attr_regs;
|
||||
|
@ -154,7 +154,7 @@ static void upload_sf_prog(struct brw_context *brw)
|
|||
}
|
||||
|
||||
/* _NEW_TRANSFORM */
|
||||
key.nr_userclip = _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled);
|
||||
key.userclip_active = (ctx->Transform.ClipPlanesEnabled != 0);
|
||||
|
||||
/* _NEW_POINT */
|
||||
key.do_point_sprite = ctx->Point.PointSprite;
|
||||
|
|
|
@ -53,8 +53,7 @@ struct brw_sf_prog_key {
|
|||
GLuint frontface_ccw:1;
|
||||
GLuint do_point_sprite:1;
|
||||
GLuint sprite_origin_lower_left:1;
|
||||
GLuint nr_userclip:4;
|
||||
GLuint pad:20;
|
||||
GLuint userclip_active:1;
|
||||
};
|
||||
|
||||
struct brw_sf_compile {
|
||||
|
|
|
@ -595,7 +595,7 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,
|
|||
bool
|
||||
vec4_visitor::run()
|
||||
{
|
||||
if (c->key.nr_userclip && !c->key.uses_clip_distance)
|
||||
if (c->key.userclip_active && !c->key.uses_clip_distance)
|
||||
setup_uniform_clipplane_values();
|
||||
|
||||
/* Generate VS IR for main(). (the visitor only descends into
|
||||
|
|
|
@ -1790,7 +1790,7 @@ vec4_visitor::emit_psiz_and_flags(struct brw_reg reg)
|
|||
{
|
||||
if (intel->gen < 6 &&
|
||||
((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
|
||||
c->key.nr_userclip || brw->has_negative_rhw_bug)) {
|
||||
c->key.userclip_active || brw->has_negative_rhw_bug)) {
|
||||
dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
|
||||
dst_reg header1_w = header1;
|
||||
header1_w.writemask = WRITEMASK_W;
|
||||
|
@ -1996,7 +1996,7 @@ vec4_visitor::emit_urb_writes()
|
|||
|
||||
/* FINISHME: edgeflag */
|
||||
|
||||
brw_compute_vue_map(&c->vue_map, intel, c->key.nr_userclip,
|
||||
brw_compute_vue_map(&c->vue_map, intel, c->key.userclip_active,
|
||||
c->prog_data.outputs_written);
|
||||
|
||||
/* First mrf is the g0-based message header containing URB handles and such,
|
||||
|
|
|
@ -55,7 +55,8 @@ static inline void assign_vue_slot(struct brw_vue_map *vue_map,
|
|||
*/
|
||||
void
|
||||
brw_compute_vue_map(struct brw_vue_map *vue_map,
|
||||
const struct intel_context *intel, int nr_userclip,
|
||||
const struct intel_context *intel,
|
||||
bool userclip_active,
|
||||
GLbitfield64 outputs_written)
|
||||
{
|
||||
int i;
|
||||
|
@ -112,7 +113,7 @@ brw_compute_vue_map(struct brw_vue_map *vue_map,
|
|||
*/
|
||||
assign_vue_slot(vue_map, VERT_RESULT_PSIZ);
|
||||
assign_vue_slot(vue_map, VERT_RESULT_HPOS);
|
||||
if (nr_userclip) {
|
||||
if (userclip_active) {
|
||||
assign_vue_slot(vue_map, VERT_RESULT_CLIP_DIST0);
|
||||
assign_vue_slot(vue_map, VERT_RESULT_CLIP_DIST1);
|
||||
}
|
||||
|
@ -287,6 +288,7 @@ static void brw_upload_vs_prog(struct brw_context *brw)
|
|||
* the inputs it asks for, whether they are varying or not.
|
||||
*/
|
||||
key.program_string_id = vp->id;
|
||||
key.userclip_active = (ctx->Transform.ClipPlanesEnabled != 0);
|
||||
key.nr_userclip = _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled);
|
||||
key.uses_clip_distance = vp->program.UsesClipDistance;
|
||||
if (!key.uses_clip_distance)
|
||||
|
|
|
@ -46,6 +46,12 @@ struct brw_vs_prog_key {
|
|||
*/
|
||||
uint8_t gl_fixed_input_size[VERT_ATTRIB_MAX];
|
||||
|
||||
/**
|
||||
* True if at least one clip flag is enabled, regardless of whether the
|
||||
* shader uses clip planes or gl_ClipDistance.
|
||||
*/
|
||||
GLuint userclip_active:1;
|
||||
|
||||
/**
|
||||
* Number of user clip planes (or clip distances) that are active.
|
||||
*/
|
||||
|
|
|
@ -202,7 +202,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
|
|||
|
||||
/* User clip planes from curbe:
|
||||
*/
|
||||
if (c->key.nr_userclip) {
|
||||
if (c->key.userclip_active) {
|
||||
if (intel->gen >= 6) {
|
||||
for (i = 0; i < c->key.nr_userclip; i++) {
|
||||
c->userplane[i] = stride(brw_vec4_grf(reg + i / 2,
|
||||
|
@ -325,7 +325,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
|
|||
|
||||
/* Allocate outputs. The non-position outputs go straight into message regs.
|
||||
*/
|
||||
brw_compute_vue_map(&c->vue_map, intel, c->key.nr_userclip,
|
||||
brw_compute_vue_map(&c->vue_map, intel, c->key.userclip_active,
|
||||
c->prog_data.outputs_written);
|
||||
c->first_output = reg;
|
||||
|
||||
|
@ -1564,7 +1564,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
|
|||
}
|
||||
|
||||
/* Set the user clip distances in dword 8-15. (m3-4)*/
|
||||
if (c->key.nr_userclip) {
|
||||
if (c->key.userclip_active) {
|
||||
for (i = 0; i < c->key.nr_userclip; i++) {
|
||||
struct brw_reg m;
|
||||
if (i < 4)
|
||||
|
@ -1577,7 +1577,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
|
|||
}
|
||||
} else if ((c->prog_data.outputs_written &
|
||||
BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
|
||||
c->key.nr_userclip || brw->has_negative_rhw_bug) {
|
||||
c->key.userclip_active || brw->has_negative_rhw_bug) {
|
||||
struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
|
||||
GLuint i;
|
||||
|
||||
|
@ -1649,7 +1649,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
|
|||
*/
|
||||
brw_MOV(p, brw_message_reg(2), pos);
|
||||
len_vertex_header = 1;
|
||||
if (c->key.nr_userclip > 0)
|
||||
if (c->key.userclip_active)
|
||||
len_vertex_header += 2;
|
||||
} else if (intel->gen == 5) {
|
||||
/* There are 20 DWs (D0-D19) in VUE header on Ironlake:
|
||||
|
|
|
@ -126,12 +126,12 @@ upload_sf_state(struct brw_context *brw)
|
|||
int urb_entry_read_offset = 1;
|
||||
float point_size;
|
||||
uint16_t attr_overrides[FRAG_ATTRIB_MAX];
|
||||
int nr_userclip;
|
||||
bool userclip_active;
|
||||
|
||||
/* _NEW_TRANSFORM */
|
||||
nr_userclip = _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled);
|
||||
userclip_active = (ctx->Transform.ClipPlanesEnabled != 0);
|
||||
|
||||
brw_compute_vue_map(&vue_map, intel, nr_userclip, vs_outputs_written);
|
||||
brw_compute_vue_map(&vue_map, intel, userclip_active, vs_outputs_written);
|
||||
urb_entry_read_length = (vue_map.num_slots + 1)/2 - urb_entry_read_offset;
|
||||
if (urb_entry_read_length == 0) {
|
||||
/* Setting the URB entry read length to 0 causes undefined behavior, so
|
||||
|
|
|
@ -44,10 +44,10 @@ upload_sbe_state(struct brw_context *brw)
|
|||
int attr = 0, input_index = 0;
|
||||
/* _NEW_TRANSFORM */
|
||||
int urb_entry_read_offset = 1;
|
||||
int nr_userclip = _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled);
|
||||
bool userclip_active = (ctx->Transform.ClipPlanesEnabled != 0);
|
||||
uint16_t attr_overrides[FRAG_ATTRIB_MAX];
|
||||
|
||||
brw_compute_vue_map(&vue_map, intel, nr_userclip, vs_outputs_written);
|
||||
brw_compute_vue_map(&vue_map, intel, userclip_active, vs_outputs_written);
|
||||
urb_entry_read_length = (vue_map.num_slots + 1)/2 - urb_entry_read_offset;
|
||||
if (urb_entry_read_length == 0) {
|
||||
/* Setting the URB entry read length to 0 causes undefined behavior, so
|
||||
|
|
Loading…
Reference in New Issue