r300g: add a new debug option which disables compiler optimizations

Those are:
- dead-code elimination
- constant folding
- peephole (mainly copy propagation)
- register allocation

There are some bugs which I need to track down.

Also fix up the descriptions of all the debug options.
This commit is contained in:
Marek Olšák 2010-09-01 08:12:51 +02:00
parent d2f4ceaa47
commit cfc461fca6
10 changed files with 35 additions and 24 deletions

View File

@ -27,24 +27,25 @@
#include <stdio.h>
static const struct debug_named_value debug_options[] = {
{ "fp", DBG_FP, "Fragment program handling (for debugging)" },
{ "vp", DBG_VP, "Vertex program handling (for debugging)" },
{ "draw", DBG_DRAW, "Draw calls (for debugging)" },
{ "swtcl", DBG_SWTCL, "SWTCL-specific info (for debugging)" },
{ "rsblock", DBG_RS_BLOCK, "Rasterizer registers (for debugging)" },
{ "psc", DBG_PSC, "Vertex stream registers (for debugging)" },
{ "tex", DBG_TEX, "Textures (for debugging)" },
{ "texalloc", DBG_TEXALLOC, "Texture allocation (for debugging)" },
{ "fall", DBG_FALL, "Fallbacks (for debugging)" },
{ "rs", DBG_RS, "Rasterizer (for debugging)" },
{ "fb", DBG_FB, "Framebuffer (for debugging)" },
{ "cbzb", DBG_CBZB, "Fast color clear info (for debugging)" },
{ "fakeocc", DBG_FAKE_OCC, "Use fake occlusion queries (for debugging)" },
{ "anisohq", DBG_ANISOHQ, "High quality anisotropic filtering (for benchmarking)" },
{ "notiling", DBG_NO_TILING, "Disable tiling (for benchmarking)" },
{ "noimmd", DBG_NO_IMMD, "Disable immediate mode (for benchmarking)" },
{ "stats", DBG_STATS, "Gather statistics" },
{ "hyperz", DBG_HYPERZ, "HyperZ (for debugging)" },
{ "fp", DBG_FP, "Log fragment program compilation" },
{ "vp", DBG_VP, "Log bertex program compilation" },
{ "draw", DBG_DRAW, "Log draw calls" },
{ "swtcl", DBG_SWTCL, "Log SWTCL-specific info" },
{ "rsblock", DBG_RS_BLOCK, "Log rasterizer registers" },
{ "psc", DBG_PSC, "Log vertex stream registers" },
{ "tex", DBG_TEX, "Log basic info about textures" },
{ "texalloc", DBG_TEXALLOC, "Log texture mipmap tree info" },
{ "fall", DBG_FALL, "Log fallbacks" },
{ "rs", DBG_RS, "Log rasterizer" },
{ "fb", DBG_FB, "Log framebuffer" },
{ "cbzb", DBG_CBZB, "Log fast color clear info" },
{ "stats", DBG_STATS, "Log emission statistics" },
{ "hyperz", DBG_HYPERZ, "Log HyperZ info" },
{ "fakeocc", DBG_FAKE_OCC, "Use fake occlusion queries" },
{ "anisohq", DBG_ANISOHQ, "Use high quality anisotropic filtering" },
{ "notiling", DBG_NO_TILING, "Disable tiling" },
{ "noimmd", DBG_NO_IMMD, "Disable immediate mode" },
{ "noopt", DBG_NO_OPT, "Disable shader optimizations" },
/* must be last */
DEBUG_NAMED_VALUE_END

View File

@ -385,6 +385,7 @@ static void r300_translate_fragment_shader(
compiler.code = &shader->code;
compiler.state = shader->compare_state;
compiler.Base.is_r500 = r300->screen->caps.is_r500;
compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT);
compiler.Base.has_half_swizzles = TRUE;
compiler.Base.max_temp_regs = compiler.Base.is_r500 ? 128 : 32;
compiler.Base.max_constants = compiler.Base.is_r500 ? 256 : 32;

View File

@ -97,6 +97,7 @@ r300_winsys_screen(struct pipe_screen *screen) {
#define DBG_NO_TILING (1 << 17)
#define DBG_NO_IMMD (1 << 18)
#define DBG_FAKE_OCC (1 << 19)
#define DBG_NO_OPT (1 << 20)
/* Statistics. */
#define DBG_STATS (1 << 24)
/*@}*/

View File

@ -205,6 +205,7 @@ void r300_translate_vertex_shader(struct r300_context *r300,
compiler.code = &vs->code;
compiler.UserData = vs;
compiler.Base.is_r500 = r300->screen->caps.is_r500;
compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT);
compiler.Base.has_half_swizzles = FALSE;
compiler.Base.max_temp_regs = 32;
compiler.Base.max_constants = 256;

View File

@ -94,6 +94,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{
int is_r500 = c->Base.is_r500;
int kill_consts = c->Base.remove_unused_constants;
int opt = !c->Base.disable_optimizations;
/* Lists of instruction transformations. */
struct radeon_program_transformation rewrite_tex[] = {
@ -128,9 +129,9 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{"transform TEX", 1, 1, rc_local_transform, rewrite_tex},
{"native rewrite", 1, is_r500, rc_local_transform, native_rewrite_r500},
{"native rewrite", 1, !is_r500, rc_local_transform, native_rewrite_r300},
{"deadcode", 1, 1, rc_dataflow_deadcode, dataflow_outputs_mark_use},
{"deadcode", 1, opt, rc_dataflow_deadcode, dataflow_outputs_mark_use},
{"emulate loops", 1, !is_r500, rc_emulate_loops, NULL},
{"dataflow optimize", 1, 1, rc_optimize, NULL},
{"dataflow optimize", 1, opt, rc_optimize, NULL},
{"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL},
{"dead constants", 1, kill_consts, rc_remove_unused_constants, &c->code->constants_remap_table},
/* This pass makes it easier for the scheduler to group TEX
@ -139,7 +140,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{"register rename", 1, !is_r500, rc_rename_regs, NULL},
{"pair translate", 1, 1, rc_pair_translate, NULL},
{"pair scheduling", 1, 1, rc_pair_schedule, NULL},
{"register allocation", 1, 1, rc_pair_regalloc, NULL},
{"register allocation", 1, opt, rc_pair_regalloc, NULL},
{"final code validation", 0, 1, rc_validate_final_shader, NULL},
{"machine code generation", 0, is_r500, r500BuildFragmentProgramHwCode, NULL},
{"machine code generation", 0, !is_r500, r300BuildFragmentProgramHwCode, NULL},

View File

@ -991,6 +991,7 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c)
{
int is_r500 = c->Base.is_r500;
int kill_consts = c->Base.remove_unused_constants;
int opt = !c->Base.disable_optimizations;
/* Lists of instruction transformations. */
struct radeon_program_transformation alu_rewrite_r500[] = {
@ -1029,12 +1030,12 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c)
{"native rewrite", 1, is_r500, rc_local_transform, alu_rewrite_r500},
{"native rewrite", 1, !is_r500, rc_local_transform, alu_rewrite_r300},
{"emulate modifiers", 1, !is_r500, rc_local_transform, emulate_modifiers},
{"deadcode", 1, 1, rc_dataflow_deadcode, dataflow_outputs_mark_used},
{"dataflow optimize", 1, 1, rc_optimize, NULL},
{"deadcode", 1, opt, rc_dataflow_deadcode, dataflow_outputs_mark_used},
{"dataflow optimize", 1, opt, rc_optimize, NULL},
/* This pass must be done after optimizations. */
{"source conflict resolve", 1, 1, rc_local_transform, resolve_src_conflicts},
{"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL},
{"register allocation", 1, 1, allocate_temporary_registers, NULL},
{"register allocation", 1, opt, allocate_temporary_registers, NULL},
{"dead constants", 1, kill_consts, rc_remove_unused_constants, &c->code->constants_remap_table},
{"final code validation", 0, 1, rc_validate_final_shader, NULL},
{"machine code generation", 0, 1, translate_vertex_program, NULL},

View File

@ -42,6 +42,7 @@ struct radeon_compiler {
/* Hardware specification. */
unsigned is_r500:1;
unsigned has_half_swizzles:1;
unsigned disable_optimizations:1;
unsigned max_temp_regs;
unsigned max_constants;
int max_alu_insts;

View File

@ -89,6 +89,7 @@ static void create_vertex_program(struct r300_context *r300)
compiler.SetHwInputOutput = vp_ins_outs;
compiler.code = &r300->blit.vp_code;
compiler.Base.is_r500 = r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515;
compiler.Base.disable_optimizations = 0;
compiler.Base.has_half_swizzles = 0;
compiler.Base.max_temp_regs = 32;
compiler.Base.max_constants = 256;
@ -124,6 +125,7 @@ static void create_fragment_program(struct r300_context *r300)
compiler.OutputDepth = FRAG_RESULT_DEPTH;
compiler.enable_shadow_ambient = GL_TRUE;
compiler.Base.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515);
compiler.Base.disable_optimizations = 0;
compiler.Base.has_half_swizzles = 1;
compiler.Base.max_temp_regs = (compiler.Base.is_r500) ? 128 : 32;
compiler.Base.max_constants = compiler.Base.is_r500 ? 256 : 32;

View File

@ -220,6 +220,7 @@ static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_prog
compiler.state = fp->state;
compiler.enable_shadow_ambient = GL_TRUE;
compiler.Base.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) ? GL_TRUE : GL_FALSE;
compiler.Base.disable_optimizations = 0;
compiler.Base.has_half_swizzles = 1;
compiler.Base.max_temp_regs = (compiler.Base.is_r500) ? 128 : 32;
compiler.Base.max_constants = compiler.Base.is_r500 ? 256 : 32;

View File

@ -245,6 +245,7 @@ static struct r300_vertex_program *build_program(GLcontext *ctx,
compiler.RequiredOutputs = compute_required_outputs(vp->Base, vp->key.FpReads);
compiler.SetHwInputOutput = &t_inputs_outputs;
compiler.Base.is_r500 = R300_CONTEXT(ctx)->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515;
compiler.Base.disable_optimizations = 0;
compiler.Base.has_half_swizzles = 0;
compiler.Base.max_temp_regs = 32;
compiler.Base.max_constants = 256;