i965: Disable thread dispatch when the FS doesn't do any work.
This should reduce the cost of generating shadow maps, for example. No performance difference measured in nexuiz, though it does trigger this path.
This commit is contained in:
parent
2595589f1d
commit
f30de69640
|
@ -33,6 +33,7 @@
|
|||
#ifndef BRW_WM_H
|
||||
#define BRW_WM_H
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "program/prog_instruction.h"
|
||||
#include "brw_context.h"
|
||||
|
@ -473,4 +474,6 @@ GLboolean brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog
|
|||
struct gl_shader *brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type);
|
||||
struct gl_shader_program *brw_new_shader_program(struct gl_context *ctx, GLuint name);
|
||||
|
||||
bool brw_color_buffer_write_enabled(struct brw_context *brw);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -52,9 +52,36 @@ struct brw_wm_unit_key {
|
|||
unsigned int nr_surfaces, sampler_count;
|
||||
GLboolean uses_depth, computes_depth, uses_kill, is_glsl;
|
||||
GLboolean polygon_stipple, stats_wm, line_stipple, offset_enable;
|
||||
GLboolean color_write_enable;
|
||||
GLfloat offset_units, offset_factor;
|
||||
};
|
||||
|
||||
bool
|
||||
brw_color_buffer_write_enabled(struct brw_context *brw)
|
||||
{
|
||||
struct gl_context *ctx = &brw->intel.ctx;
|
||||
const struct gl_fragment_program *fp = brw->fragment_program;
|
||||
int i;
|
||||
|
||||
/* _NEW_BUFFERS */
|
||||
for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
|
||||
struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
|
||||
|
||||
/* _NEW_COLOR */
|
||||
if (rb &&
|
||||
(fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_COLOR) ||
|
||||
fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DATA0 + i)) &&
|
||||
(ctx->Color.ColorMask[i][0] ||
|
||||
ctx->Color.ColorMask[i][1] ||
|
||||
ctx->Color.ColorMask[i][2] ||
|
||||
ctx->Color.ColorMask[i][3])) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void
|
||||
wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
|
||||
{
|
||||
|
@ -100,6 +127,9 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
|
|||
if (brw->state.depth_region == NULL)
|
||||
key->computes_depth = 0;
|
||||
|
||||
/* _NEW_BUFFERS | _NEW_COLOR */
|
||||
key->color_write_enable = brw_color_buffer_write_enabled(brw);
|
||||
|
||||
/* _NEW_COLOR */
|
||||
key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled;
|
||||
key->is_glsl = bfp->isGLSL;
|
||||
|
@ -188,7 +218,13 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
|
|||
wm.wm5.enable_16_pix = 1;
|
||||
|
||||
wm.wm5.max_threads = brw->wm_max_threads - 1;
|
||||
wm.wm5.thread_dispatch_enable = 1; /* AKA: color_write */
|
||||
|
||||
if (key->color_write_enable ||
|
||||
key->uses_kill ||
|
||||
key->computes_depth) {
|
||||
wm.wm5.thread_dispatch_enable = 1;
|
||||
}
|
||||
|
||||
wm.wm5.legacy_line_rast = 0;
|
||||
wm.wm5.legacy_global_depth_bias = 0;
|
||||
wm.wm5.early_depth_test = 1; /* never need to disable */
|
||||
|
@ -293,7 +329,8 @@ const struct brw_tracked_state brw_wm_unit = {
|
|||
_NEW_POLYGONSTIPPLE |
|
||||
_NEW_LINE |
|
||||
_NEW_COLOR |
|
||||
_NEW_DEPTH),
|
||||
_NEW_DEPTH |
|
||||
_NEW_BUFFERS),
|
||||
|
||||
.brw = (BRW_NEW_FRAGMENT_PROGRAM |
|
||||
BRW_NEW_CURBE_OFFSETS |
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include "brw_state.h"
|
||||
#include "brw_defines.h"
|
||||
#include "brw_util.h"
|
||||
#include "brw_wm.h"
|
||||
#include "program/prog_parameter.h"
|
||||
#include "program/prog_statevars.h"
|
||||
#include "intel_batchbuffer.h"
|
||||
|
@ -123,7 +124,6 @@ upload_wm_state(struct brw_context *brw)
|
|||
GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
|
||||
|
||||
dw5 |= (40 - 1) << GEN6_WM_MAX_THREADS_SHIFT;
|
||||
dw5 |= GEN6_WM_DISPATCH_ENABLE;
|
||||
|
||||
/* BRW_NEW_FRAGMENT_PROGRAM */
|
||||
if (fp->isGLSL)
|
||||
|
@ -149,6 +149,11 @@ upload_wm_state(struct brw_context *brw)
|
|||
if (fp->program.UsesKill || ctx->Color.AlphaEnabled)
|
||||
dw5 |= GEN6_WM_KILL_ENABLE;
|
||||
|
||||
if (brw_color_buffer_write_enabled(brw) ||
|
||||
dw5 & (GEN6_WM_KILL_ENABLE | GEN6_WM_COMPUTED_DEPTH)) {
|
||||
dw5 |= GEN6_WM_DISPATCH_ENABLE;
|
||||
}
|
||||
|
||||
dw6 |= GEN6_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
|
||||
|
||||
dw6 |= brw_count_bits(brw->fragment_program->Base.InputsRead) <<
|
||||
|
@ -169,7 +174,7 @@ upload_wm_state(struct brw_context *brw)
|
|||
|
||||
const struct brw_tracked_state gen6_wm_state = {
|
||||
.dirty = {
|
||||
.mesa = (_NEW_LINE | _NEW_POLYGONSTIPPLE | _NEW_COLOR |
|
||||
.mesa = (_NEW_LINE | _NEW_POLYGONSTIPPLE | _NEW_COLOR | _NEW_BUFFERS |
|
||||
_NEW_PROGRAM_CONSTANTS),
|
||||
.brw = (BRW_NEW_CURBE_OFFSETS |
|
||||
BRW_NEW_FRAGMENT_PROGRAM |
|
||||
|
|
Loading…
Reference in New Issue