r300/compiler: add new compiler parameter max_alu_insts

This commit is contained in:
Marek Olšák 2010-09-01 00:56:57 +02:00
parent a0fb406d9f
commit 313e95f0c7
12 changed files with 38 additions and 35 deletions

View File

@ -386,6 +386,7 @@ static void r300_translate_fragment_shader(
compiler.state = shader->compare_state;
compiler.Base.is_r500 = r300->screen->caps.is_r500;
compiler.Base.max_temp_regs = compiler.Base.is_r500 ? 128 : 32;
compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 512 : 64;
compiler.Base.remove_unused_constants = TRUE;
compiler.AllocateHwInputs = &allocate_hardware_inputs;
compiler.UserData = &shader->inputs;

View File

@ -206,6 +206,7 @@ void r300_translate_vertex_shader(struct r300_context *r300,
compiler.UserData = vs;
compiler.Base.is_r500 = r300->screen->caps.is_r500;
compiler.Base.max_temp_regs = 32;
compiler.Base.max_alu_insts = r300->screen->caps.is_r500 ? 1024 : 256;
compiler.Base.remove_unused_constants = TRUE;
if (compiler.Base.Debug) {

View File

@ -135,7 +135,7 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i
{
PROG_CODE;
if (code->alu.length >= R300_PFS_MAX_ALU_INST) {
if (code->alu.length >= c->Base.max_alu_insts) {
error("Too many ALU instructions");
return 0;
}

View File

@ -107,12 +107,11 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
debug_program_log(c, "before compilation");
if (c->Base.is_r500){
rc_unroll_loops(&c->Base, R500_PFS_MAX_INST);
if (c->Base.is_r500) {
rc_unroll_loops(&c->Base);
debug_program_log(c, "after unroll loops");
}
else{
rc_transform_loops(&c->Base, -1);
} else {
rc_transform_loops(&c->Base);
debug_program_log(c, "after transform loops");
rc_emulate_branches(&c->Base);
@ -165,7 +164,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
debug_program_log(c, "after deadcode");
if (!c->Base.is_r500) {
rc_emulate_loops(&c->Base, R300_PFS_MAX_ALU_INST);
rc_emulate_loops(&c->Base);
debug_program_log(c, "after emulate loops");
}

View File

@ -985,10 +985,7 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c)
debug_program_log(c, "before compilation");
if (c->Base.is_r500)
rc_transform_loops(&c->Base, R500_VS_MAX_ALU);
else
rc_transform_loops(&c->Base, R300_VS_MAX_ALU);
rc_transform_loops(&c->Base);
if (c->Base.Error)
return;

View File

@ -229,7 +229,7 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair
{
PROG_CODE;
if (code->inst_end >= 511) {
if (code->inst_end >= c->Base.max_alu_insts-1) {
error("emit_alu: Too many instructions");
return;
}
@ -322,7 +322,7 @@ static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_inst
{
PROG_CODE;
if (code->inst_end >= 511) {
if (code->inst_end >= c->Base.max_alu_insts-1) {
error("emit_tex: Too many instructions");
return 0;
}
@ -370,7 +370,7 @@ static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_inst
static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
{
if (s->Code->inst_end >= 511) {
if (s->Code->inst_end >= s->C->max_alu_insts-1) {
rc_error(s->C, "emit_tex: Too many instructions");
return;
}
@ -577,7 +577,7 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi
}
}
if (code->max_temp_idx >= 128)
if (code->max_temp_idx >= compiler->Base.max_temp_regs)
rc_error(&compiler->Base, "Too many hardware temporaries used");
if (compiler->Base.Error)
@ -587,7 +587,7 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi
(code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
/* This may happen when dead-code elimination is disabled or
* when most of the fragment program logic is leading to a KIL */
if (code->inst_end >= 511) {
if (code->inst_end >= compiler->Base.max_alu_insts-1) {
rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
return;
}

View File

@ -42,6 +42,7 @@ struct radeon_compiler {
/* Hardware specification. */
unsigned is_r500:1;
unsigned max_temp_regs;
int max_alu_insts;
/* Whether to remove unused constants and empty holes in constant space. */
unsigned remove_unused_constants:1;

View File

@ -78,12 +78,12 @@ static int src_reg_is_immediate(struct rc_src_register * src,
}
static unsigned int loop_max_possible_iterations(struct radeon_compiler *c,
struct loop_info * loop, unsigned int prog_inst_limit)
struct loop_info * loop)
{
unsigned int total_i = rc_recompute_ips(c);
unsigned int loop_i = (loop->EndLoop->IP - loop->BeginLoop->IP) - 1;
/* +1 because the program already has one iteration of the loop. */
return 1 + ((prog_inst_limit - total_i) / loop_i);
return 1 + ((c->max_alu_insts - total_i) / loop_i);
}
static void unroll_loop(struct radeon_compiler * c, struct loop_info * loop,
@ -187,11 +187,10 @@ static void get_incr_amount(void * data, struct rc_instruction * inst,
}
/**
* If prog_inst_limit is -1, then all eligible loops will be unrolled regardless
* If c->max_alu_inst is -1, then all eligible loops will be unrolled regardless
* of how many iterations they have.
*/
static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop,
unsigned int prog_inst_limit)
static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop)
{
int end_loops;
int iterations;
@ -300,9 +299,8 @@ static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop,
return 0;
}
if (prog_inst_limit > 0
&& iterations > loop_max_possible_iterations(c, loop,
prog_inst_limit)) {
if (c->max_alu_insts > 0
&& iterations > loop_max_possible_iterations(c, loop)) {
return 0;
}
@ -436,7 +434,7 @@ static int transform_loop(struct emulate_loop_state * s,
if (!build_loop_info(s->C, loop, inst))
return 0;
if(try_unroll_loop(s->C, loop, s->prog_inst_limit)){
if(try_unroll_loop(s->C, loop)){
return 1;
}
@ -472,14 +470,13 @@ static int transform_loop(struct emulate_loop_state * s,
return 1;
}
void rc_transform_loops(struct radeon_compiler *c, int prog_inst_limit)
void rc_transform_loops(struct radeon_compiler *c)
{
struct emulate_loop_state * s = &c->loop_state;
struct rc_instruction * ptr;
memset(s, 0, sizeof(struct emulate_loop_state));
s->C = c;
s->prog_inst_limit = prog_inst_limit;
for(ptr = s->C->Program.Instructions.Next;
ptr != &s->C->Program.Instructions; ptr = ptr->Next) {
if(ptr->Type == RC_INSTRUCTION_NORMAL &&
@ -490,7 +487,7 @@ void rc_transform_loops(struct radeon_compiler *c, int prog_inst_limit)
}
}
void rc_unroll_loops(struct radeon_compiler *c, int prog_inst_limit)
void rc_unroll_loops(struct radeon_compiler *c)
{
struct rc_instruction * inst;
struct loop_info loop;
@ -500,13 +497,13 @@ void rc_unroll_loops(struct radeon_compiler *c, int prog_inst_limit)
if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {
if (build_loop_info(c, &loop, inst)) {
try_unroll_loop(c, &loop, prog_inst_limit);
try_unroll_loop(c, &loop);
}
}
}
}
void rc_emulate_loops(struct radeon_compiler *c, int prog_inst_limit)
void rc_emulate_loops(struct radeon_compiler *c)
{
struct emulate_loop_state * s = &c->loop_state;
int i;
@ -518,7 +515,7 @@ void rc_emulate_loops(struct radeon_compiler *c, int prog_inst_limit)
continue;
}
unsigned int iterations = loop_max_possible_iterations(
s->C, &s->Loops[i], prog_inst_limit);
s->C, &s->Loops[i]);
unroll_loop(s->C, &s->Loops[i], iterations);
}
}

View File

@ -21,13 +21,12 @@ struct emulate_loop_state {
struct loop_info * Loops;
unsigned int LoopCount;
unsigned int LoopReserved;
int prog_inst_limit;
};
void rc_transform_loops(struct radeon_compiler *c, int prog_inst_limit);
void rc_transform_loops(struct radeon_compiler *c);
void rc_unroll_loops(struct radeon_compiler * c, int prog_inst_limit);
void rc_unroll_loops(struct radeon_compiler * c);
void rc_emulate_loops(struct radeon_compiler * c, int prog_inst_limit);
void rc_emulate_loops(struct radeon_compiler * c);
#endif /* RADEON_EMULATE_LOOPS_H */

View File

@ -88,6 +88,9 @@ static void create_vertex_program(struct r300_context *r300)
compiler.RequiredOutputs = compiler.Base.Program.OutputsWritten = (1 << VERT_RESULT_HPOS) | (1 << VERT_RESULT_TEX0);
compiler.SetHwInputOutput = vp_ins_outs;
compiler.code = &r300->blit.vp_code;
compiler.Base.is_r500 = r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515;
compiler.Base.max_temp_regs = 32;
compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 1024 : 256;
r3xx_compile_vertex_program(&compiler);
}
@ -120,6 +123,7 @@ static void create_fragment_program(struct r300_context *r300)
compiler.enable_shadow_ambient = GL_TRUE;
compiler.Base.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515);
compiler.Base.max_temp_regs = (compiler.Base.is_r500) ? 128 : 32;
compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 512 : 64;
compiler.code = &r300->blit.fp_code;
compiler.AllocateHwInputs = fp_allocate_hw_inputs;

View File

@ -221,6 +221,7 @@ static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_prog
compiler.enable_shadow_ambient = GL_TRUE;
compiler.Base.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) ? GL_TRUE : GL_FALSE;
compiler.Base.max_temp_regs = (compiler.Base.is_r500) ? 128 : 32;
compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 512 : 64;
compiler.OutputDepth = FRAG_RESULT_DEPTH;
memset(compiler.OutputColor, 0, 4 * sizeof(unsigned));
compiler.OutputColor[0] = FRAG_RESULT_COLOR;

View File

@ -244,6 +244,9 @@ static struct r300_vertex_program *build_program(GLcontext *ctx,
compiler.code = &vp->code;
compiler.RequiredOutputs = compute_required_outputs(vp->Base, vp->key.FpReads);
compiler.SetHwInputOutput = &t_inputs_outputs;
compiler.Base.is_r500 = R300_CONTEXT(ctx)->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515;
compiler.Base.max_temp_regs = 32;
compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 1024 : 256;
if (compiler.Base.Debug) {
fprintf(stderr, "Initial vertex program:\n");