i965: Generate vs code using scalar backend for BDW+
With everything in place, we can now use the scalar backend compiler for vertex shaders on BDW+. We make scalar vertex shaders the default on BDW+ but add a new vec4vs debug option to force the vec4 backend. No piglit regressions. Performance impact is minimal, I see a ~1.5 improvement on the T-Rex GLBenchmark case, but in general it's in the noise. Some of our internal synthetic, vs bounded benchmarks show great improvement, 20%-40% in some cases, but real-world cases are mostly unaffected. Signed-off-by: Kristian Høgsberg <krh@bitplanet.net> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
parent
7ff457b930
commit
ee5fb8d1ba
|
@ -559,6 +559,15 @@ brw_initialize_context_constants(struct brw_context *brw)
|
||||||
ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = true;
|
ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = true;
|
||||||
ctx->Const.ShaderCompilerOptions[MESA_SHADER_GEOMETRY].OptimizeForAOS = true;
|
ctx->Const.ShaderCompilerOptions[MESA_SHADER_GEOMETRY].OptimizeForAOS = true;
|
||||||
|
|
||||||
|
if (brw->scalar_vs) {
|
||||||
|
/* If we're using the scalar backend for vertex shaders, we need to
|
||||||
|
* configure these accordingly.
|
||||||
|
*/
|
||||||
|
ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectOutput = true;
|
||||||
|
ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectTemp = true;
|
||||||
|
ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = false;
|
||||||
|
}
|
||||||
|
|
||||||
/* ARB_viewport_array */
|
/* ARB_viewport_array */
|
||||||
if (brw->gen >= 7 && ctx->API == API_OPENGL_CORE) {
|
if (brw->gen >= 7 && ctx->API == API_OPENGL_CORE) {
|
||||||
ctx->Const.MaxViewports = GEN7_NUM_VIEWPORTS;
|
ctx->Const.MaxViewports = GEN7_NUM_VIEWPORTS;
|
||||||
|
@ -754,6 +763,10 @@ brwCreateContext(gl_api api,
|
||||||
|
|
||||||
brw_process_driconf_options(brw);
|
brw_process_driconf_options(brw);
|
||||||
brw_process_intel_debug_variable(brw);
|
brw_process_intel_debug_variable(brw);
|
||||||
|
|
||||||
|
if (brw->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS))
|
||||||
|
brw->scalar_vs = true;
|
||||||
|
|
||||||
brw_initialize_context_constants(brw);
|
brw_initialize_context_constants(brw);
|
||||||
|
|
||||||
ctx->Const.ResetStrategy = notify_reset
|
ctx->Const.ResetStrategy = notify_reset
|
||||||
|
|
|
@ -1076,6 +1076,7 @@ struct brw_context
|
||||||
bool has_pln;
|
bool has_pln;
|
||||||
bool no_simd8;
|
bool no_simd8;
|
||||||
bool use_rep_send;
|
bool use_rep_send;
|
||||||
|
bool scalar_vs;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Some versions of Gen hardware don't do centroid interpolation correctly
|
* Some versions of Gen hardware don't do centroid interpolation correctly
|
||||||
|
|
|
@ -74,6 +74,19 @@ brw_shader_precompile(struct gl_context *ctx,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool
|
||||||
|
is_scalar_shader_stage(struct brw_context *brw, int stage)
|
||||||
|
{
|
||||||
|
switch (stage) {
|
||||||
|
case MESA_SHADER_FRAGMENT:
|
||||||
|
return true;
|
||||||
|
case MESA_SHADER_VERTEX:
|
||||||
|
return brw->scalar_vs;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
brw_lower_packing_builtins(struct brw_context *brw,
|
brw_lower_packing_builtins(struct brw_context *brw,
|
||||||
gl_shader_stage shader_type,
|
gl_shader_stage shader_type,
|
||||||
|
@ -84,7 +97,7 @@ brw_lower_packing_builtins(struct brw_context *brw,
|
||||||
| LOWER_PACK_UNORM_2x16
|
| LOWER_PACK_UNORM_2x16
|
||||||
| LOWER_UNPACK_UNORM_2x16;
|
| LOWER_UNPACK_UNORM_2x16;
|
||||||
|
|
||||||
if (shader_type == MESA_SHADER_FRAGMENT) {
|
if (is_scalar_shader_stage(brw, shader_type)) {
|
||||||
ops |= LOWER_UNPACK_UNORM_4x8
|
ops |= LOWER_UNPACK_UNORM_4x8
|
||||||
| LOWER_UNPACK_SNORM_4x8
|
| LOWER_UNPACK_SNORM_4x8
|
||||||
| LOWER_PACK_UNORM_4x8
|
| LOWER_PACK_UNORM_4x8
|
||||||
|
@ -97,7 +110,7 @@ brw_lower_packing_builtins(struct brw_context *brw,
|
||||||
* lowering is needed. For SOA code, the Half2x16 ops must be
|
* lowering is needed. For SOA code, the Half2x16 ops must be
|
||||||
* scalarized.
|
* scalarized.
|
||||||
*/
|
*/
|
||||||
if (shader_type == MESA_SHADER_FRAGMENT) {
|
if (is_scalar_shader_stage(brw, shader_type)) {
|
||||||
ops |= LOWER_PACK_HALF_2x16_TO_SPLIT
|
ops |= LOWER_PACK_HALF_2x16_TO_SPLIT
|
||||||
| LOWER_UNPACK_HALF_2x16_TO_SPLIT;
|
| LOWER_UNPACK_HALF_2x16_TO_SPLIT;
|
||||||
}
|
}
|
||||||
|
@ -185,7 +198,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
|
||||||
do {
|
do {
|
||||||
progress = false;
|
progress = false;
|
||||||
|
|
||||||
if (stage == MESA_SHADER_FRAGMENT) {
|
if (is_scalar_shader_stage(brw, stage)) {
|
||||||
brw_do_channel_expressions(shader->base.ir);
|
brw_do_channel_expressions(shader->base.ir);
|
||||||
brw_do_vector_splitting(shader->base.ir);
|
brw_do_vector_splitting(shader->base.ir);
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,6 +22,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "brw_vec4.h"
|
#include "brw_vec4.h"
|
||||||
|
#include "brw_fs.h"
|
||||||
#include "brw_cfg.h"
|
#include "brw_cfg.h"
|
||||||
#include "brw_vs.h"
|
#include "brw_vs.h"
|
||||||
#include "brw_dead_control_flow.h"
|
#include "brw_dead_control_flow.h"
|
||||||
|
@ -1767,6 +1768,7 @@ brw_vs_emit(struct brw_context *brw,
|
||||||
{
|
{
|
||||||
bool start_busy = false;
|
bool start_busy = false;
|
||||||
double start_time = 0;
|
double start_time = 0;
|
||||||
|
const unsigned *assembly = NULL;
|
||||||
|
|
||||||
if (unlikely(brw->perf_debug)) {
|
if (unlikely(brw->perf_debug)) {
|
||||||
start_busy = (brw->batch.last_bo &&
|
start_busy = (brw->batch.last_bo &&
|
||||||
|
@ -1781,6 +1783,37 @@ brw_vs_emit(struct brw_context *brw,
|
||||||
if (unlikely(INTEL_DEBUG & DEBUG_VS))
|
if (unlikely(INTEL_DEBUG & DEBUG_VS))
|
||||||
brw_dump_ir("vertex", prog, &shader->base, &c->vp->program.Base);
|
brw_dump_ir("vertex", prog, &shader->base, &c->vp->program.Base);
|
||||||
|
|
||||||
|
if (prog && brw->gen >= 8 && brw->scalar_vs) {
|
||||||
|
fs_visitor v(brw, mem_ctx, &c->key, prog_data, prog, &c->vp->program, 8);
|
||||||
|
if (!v.run_vs()) {
|
||||||
|
if (prog) {
|
||||||
|
prog->LinkStatus = false;
|
||||||
|
ralloc_strcat(&prog->InfoLog, v.fail_msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
_mesa_problem(NULL, "Failed to compile vertex shader: %s\n",
|
||||||
|
v.fail_msg);
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
fs_generator g(brw, mem_ctx, (void *) &c->key, &prog_data->base.base,
|
||||||
|
&c->vp->program.Base, v.runtime_check_aads_emit);
|
||||||
|
if (INTEL_DEBUG & DEBUG_VS) {
|
||||||
|
char *name = ralloc_asprintf(mem_ctx, "%s vertex shader %d",
|
||||||
|
prog->Label ? prog->Label : "unnamed",
|
||||||
|
prog->Name);
|
||||||
|
g.enable_debug(name);
|
||||||
|
}
|
||||||
|
g.generate_code(v.cfg, 8);
|
||||||
|
assembly = g.get_assembly(final_assembly_size);
|
||||||
|
|
||||||
|
if (assembly)
|
||||||
|
prog_data->base.simd8 = true;
|
||||||
|
c->base.last_scratch = v.last_scratch;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!assembly) {
|
||||||
vec4_vs_visitor v(brw, c, prog_data, prog, mem_ctx);
|
vec4_vs_visitor v(brw, c, prog_data, prog, mem_ctx);
|
||||||
if (!v.run()) {
|
if (!v.run()) {
|
||||||
if (prog) {
|
if (prog) {
|
||||||
|
@ -1794,10 +1827,10 @@ brw_vs_emit(struct brw_context *brw,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
const unsigned *assembly = NULL;
|
|
||||||
vec4_generator g(brw, prog, &c->vp->program.Base, &prog_data->base,
|
vec4_generator g(brw, prog, &c->vp->program.Base, &prog_data->base,
|
||||||
mem_ctx, INTEL_DEBUG & DEBUG_VS);
|
mem_ctx, INTEL_DEBUG & DEBUG_VS);
|
||||||
assembly = g.generate_assembly(v.cfg, final_assembly_size);
|
assembly = g.generate_assembly(v.cfg, final_assembly_size);
|
||||||
|
}
|
||||||
|
|
||||||
if (unlikely(brw->perf_debug) && shader) {
|
if (unlikely(brw->perf_debug) && shader) {
|
||||||
if (shader->compiled_once) {
|
if (shader->compiled_once) {
|
||||||
|
|
|
@ -68,6 +68,7 @@ static const struct dri_debug_control debug_control[] = {
|
||||||
{ "optimizer", DEBUG_OPTIMIZER },
|
{ "optimizer", DEBUG_OPTIMIZER },
|
||||||
{ "ann", DEBUG_ANNOTATION },
|
{ "ann", DEBUG_ANNOTATION },
|
||||||
{ "no8", DEBUG_NO8 },
|
{ "no8", DEBUG_NO8 },
|
||||||
|
{ "vec4vs", DEBUG_VEC4VS },
|
||||||
{ NULL, 0 }
|
{ NULL, 0 }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -63,6 +63,7 @@ extern uint64_t INTEL_DEBUG;
|
||||||
#define DEBUG_OPTIMIZER (1 << 27)
|
#define DEBUG_OPTIMIZER (1 << 27)
|
||||||
#define DEBUG_ANNOTATION (1 << 28)
|
#define DEBUG_ANNOTATION (1 << 28)
|
||||||
#define DEBUG_NO8 (1 << 29)
|
#define DEBUG_NO8 (1 << 29)
|
||||||
|
#define DEBUG_VEC4VS (1 << 30)
|
||||||
|
|
||||||
#ifdef HAVE_ANDROID_PLATFORM
|
#ifdef HAVE_ANDROID_PLATFORM
|
||||||
#define LOG_TAG "INTEL-MESA"
|
#define LOG_TAG "INTEL-MESA"
|
||||||
|
|
Loading…
Reference in New Issue