r300/compiler: add new compiler parameter max_alu_insts
This commit is contained in:
parent
a0fb406d9f
commit
313e95f0c7
|
@ -386,6 +386,7 @@ static void r300_translate_fragment_shader(
|
|||
compiler.state = shader->compare_state;
|
||||
compiler.Base.is_r500 = r300->screen->caps.is_r500;
|
||||
compiler.Base.max_temp_regs = compiler.Base.is_r500 ? 128 : 32;
|
||||
compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 512 : 64;
|
||||
compiler.Base.remove_unused_constants = TRUE;
|
||||
compiler.AllocateHwInputs = &allocate_hardware_inputs;
|
||||
compiler.UserData = &shader->inputs;
|
||||
|
|
|
@ -206,6 +206,7 @@ void r300_translate_vertex_shader(struct r300_context *r300,
|
|||
compiler.UserData = vs;
|
||||
compiler.Base.is_r500 = r300->screen->caps.is_r500;
|
||||
compiler.Base.max_temp_regs = 32;
|
||||
compiler.Base.max_alu_insts = r300->screen->caps.is_r500 ? 1024 : 256;
|
||||
compiler.Base.remove_unused_constants = TRUE;
|
||||
|
||||
if (compiler.Base.Debug) {
|
||||
|
|
|
@ -135,7 +135,7 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i
|
|||
{
|
||||
PROG_CODE;
|
||||
|
||||
if (code->alu.length >= R300_PFS_MAX_ALU_INST) {
|
||||
if (code->alu.length >= c->Base.max_alu_insts) {
|
||||
error("Too many ALU instructions");
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -107,12 +107,11 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
|
|||
|
||||
debug_program_log(c, "before compilation");
|
||||
|
||||
if (c->Base.is_r500){
|
||||
rc_unroll_loops(&c->Base, R500_PFS_MAX_INST);
|
||||
if (c->Base.is_r500) {
|
||||
rc_unroll_loops(&c->Base);
|
||||
debug_program_log(c, "after unroll loops");
|
||||
}
|
||||
else{
|
||||
rc_transform_loops(&c->Base, -1);
|
||||
} else {
|
||||
rc_transform_loops(&c->Base);
|
||||
debug_program_log(c, "after transform loops");
|
||||
|
||||
rc_emulate_branches(&c->Base);
|
||||
|
@ -165,7 +164,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
|
|||
debug_program_log(c, "after deadcode");
|
||||
|
||||
if (!c->Base.is_r500) {
|
||||
rc_emulate_loops(&c->Base, R300_PFS_MAX_ALU_INST);
|
||||
rc_emulate_loops(&c->Base);
|
||||
debug_program_log(c, "after emulate loops");
|
||||
}
|
||||
|
||||
|
|
|
@ -985,10 +985,7 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c)
|
|||
|
||||
debug_program_log(c, "before compilation");
|
||||
|
||||
if (c->Base.is_r500)
|
||||
rc_transform_loops(&c->Base, R500_VS_MAX_ALU);
|
||||
else
|
||||
rc_transform_loops(&c->Base, R300_VS_MAX_ALU);
|
||||
rc_transform_loops(&c->Base);
|
||||
if (c->Base.Error)
|
||||
return;
|
||||
|
||||
|
|
|
@ -229,7 +229,7 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair
|
|||
{
|
||||
PROG_CODE;
|
||||
|
||||
if (code->inst_end >= 511) {
|
||||
if (code->inst_end >= c->Base.max_alu_insts-1) {
|
||||
error("emit_alu: Too many instructions");
|
||||
return;
|
||||
}
|
||||
|
@ -322,7 +322,7 @@ static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_inst
|
|||
{
|
||||
PROG_CODE;
|
||||
|
||||
if (code->inst_end >= 511) {
|
||||
if (code->inst_end >= c->Base.max_alu_insts-1) {
|
||||
error("emit_tex: Too many instructions");
|
||||
return 0;
|
||||
}
|
||||
|
@ -370,7 +370,7 @@ static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_inst
|
|||
|
||||
static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
|
||||
{
|
||||
if (s->Code->inst_end >= 511) {
|
||||
if (s->Code->inst_end >= s->C->max_alu_insts-1) {
|
||||
rc_error(s->C, "emit_tex: Too many instructions");
|
||||
return;
|
||||
}
|
||||
|
@ -577,7 +577,7 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi
|
|||
}
|
||||
}
|
||||
|
||||
if (code->max_temp_idx >= 128)
|
||||
if (code->max_temp_idx >= compiler->Base.max_temp_regs)
|
||||
rc_error(&compiler->Base, "Too many hardware temporaries used");
|
||||
|
||||
if (compiler->Base.Error)
|
||||
|
@ -587,7 +587,7 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi
|
|||
(code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
|
||||
/* This may happen when dead-code elimination is disabled or
|
||||
* when most of the fragment program logic is leading to a KIL */
|
||||
if (code->inst_end >= 511) {
|
||||
if (code->inst_end >= compiler->Base.max_alu_insts-1) {
|
||||
rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -42,6 +42,7 @@ struct radeon_compiler {
|
|||
/* Hardware specification. */
|
||||
unsigned is_r500:1;
|
||||
unsigned max_temp_regs;
|
||||
int max_alu_insts;
|
||||
|
||||
/* Whether to remove unused constants and empty holes in constant space. */
|
||||
unsigned remove_unused_constants:1;
|
||||
|
|
|
@ -78,12 +78,12 @@ static int src_reg_is_immediate(struct rc_src_register * src,
|
|||
}
|
||||
|
||||
static unsigned int loop_max_possible_iterations(struct radeon_compiler *c,
|
||||
struct loop_info * loop, unsigned int prog_inst_limit)
|
||||
struct loop_info * loop)
|
||||
{
|
||||
unsigned int total_i = rc_recompute_ips(c);
|
||||
unsigned int loop_i = (loop->EndLoop->IP - loop->BeginLoop->IP) - 1;
|
||||
/* +1 because the program already has one iteration of the loop. */
|
||||
return 1 + ((prog_inst_limit - total_i) / loop_i);
|
||||
return 1 + ((c->max_alu_insts - total_i) / loop_i);
|
||||
}
|
||||
|
||||
static void unroll_loop(struct radeon_compiler * c, struct loop_info * loop,
|
||||
|
@ -187,11 +187,10 @@ static void get_incr_amount(void * data, struct rc_instruction * inst,
|
|||
}
|
||||
|
||||
/**
|
||||
* If prog_inst_limit is -1, then all eligible loops will be unrolled regardless
|
||||
* If c->max_alu_inst is -1, then all eligible loops will be unrolled regardless
|
||||
* of how many iterations they have.
|
||||
*/
|
||||
static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop,
|
||||
unsigned int prog_inst_limit)
|
||||
static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop)
|
||||
{
|
||||
int end_loops;
|
||||
int iterations;
|
||||
|
@ -300,9 +299,8 @@ static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop,
|
|||
return 0;
|
||||
}
|
||||
|
||||
if (prog_inst_limit > 0
|
||||
&& iterations > loop_max_possible_iterations(c, loop,
|
||||
prog_inst_limit)) {
|
||||
if (c->max_alu_insts > 0
|
||||
&& iterations > loop_max_possible_iterations(c, loop)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -436,7 +434,7 @@ static int transform_loop(struct emulate_loop_state * s,
|
|||
if (!build_loop_info(s->C, loop, inst))
|
||||
return 0;
|
||||
|
||||
if(try_unroll_loop(s->C, loop, s->prog_inst_limit)){
|
||||
if(try_unroll_loop(s->C, loop)){
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -472,14 +470,13 @@ static int transform_loop(struct emulate_loop_state * s,
|
|||
return 1;
|
||||
}
|
||||
|
||||
void rc_transform_loops(struct radeon_compiler *c, int prog_inst_limit)
|
||||
void rc_transform_loops(struct radeon_compiler *c)
|
||||
{
|
||||
struct emulate_loop_state * s = &c->loop_state;
|
||||
struct rc_instruction * ptr;
|
||||
|
||||
memset(s, 0, sizeof(struct emulate_loop_state));
|
||||
s->C = c;
|
||||
s->prog_inst_limit = prog_inst_limit;
|
||||
for(ptr = s->C->Program.Instructions.Next;
|
||||
ptr != &s->C->Program.Instructions; ptr = ptr->Next) {
|
||||
if(ptr->Type == RC_INSTRUCTION_NORMAL &&
|
||||
|
@ -490,7 +487,7 @@ void rc_transform_loops(struct radeon_compiler *c, int prog_inst_limit)
|
|||
}
|
||||
}
|
||||
|
||||
void rc_unroll_loops(struct radeon_compiler *c, int prog_inst_limit)
|
||||
void rc_unroll_loops(struct radeon_compiler *c)
|
||||
{
|
||||
struct rc_instruction * inst;
|
||||
struct loop_info loop;
|
||||
|
@ -500,13 +497,13 @@ void rc_unroll_loops(struct radeon_compiler *c, int prog_inst_limit)
|
|||
|
||||
if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {
|
||||
if (build_loop_info(c, &loop, inst)) {
|
||||
try_unroll_loop(c, &loop, prog_inst_limit);
|
||||
try_unroll_loop(c, &loop);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void rc_emulate_loops(struct radeon_compiler *c, int prog_inst_limit)
|
||||
void rc_emulate_loops(struct radeon_compiler *c)
|
||||
{
|
||||
struct emulate_loop_state * s = &c->loop_state;
|
||||
int i;
|
||||
|
@ -518,7 +515,7 @@ void rc_emulate_loops(struct radeon_compiler *c, int prog_inst_limit)
|
|||
continue;
|
||||
}
|
||||
unsigned int iterations = loop_max_possible_iterations(
|
||||
s->C, &s->Loops[i], prog_inst_limit);
|
||||
s->C, &s->Loops[i]);
|
||||
unroll_loop(s->C, &s->Loops[i], iterations);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,13 +21,12 @@ struct emulate_loop_state {
|
|||
struct loop_info * Loops;
|
||||
unsigned int LoopCount;
|
||||
unsigned int LoopReserved;
|
||||
int prog_inst_limit;
|
||||
};
|
||||
|
||||
void rc_transform_loops(struct radeon_compiler *c, int prog_inst_limit);
|
||||
void rc_transform_loops(struct radeon_compiler *c);
|
||||
|
||||
void rc_unroll_loops(struct radeon_compiler * c, int prog_inst_limit);
|
||||
void rc_unroll_loops(struct radeon_compiler * c);
|
||||
|
||||
void rc_emulate_loops(struct radeon_compiler * c, int prog_inst_limit);
|
||||
void rc_emulate_loops(struct radeon_compiler * c);
|
||||
|
||||
#endif /* RADEON_EMULATE_LOOPS_H */
|
||||
|
|
|
@ -88,6 +88,9 @@ static void create_vertex_program(struct r300_context *r300)
|
|||
compiler.RequiredOutputs = compiler.Base.Program.OutputsWritten = (1 << VERT_RESULT_HPOS) | (1 << VERT_RESULT_TEX0);
|
||||
compiler.SetHwInputOutput = vp_ins_outs;
|
||||
compiler.code = &r300->blit.vp_code;
|
||||
compiler.Base.is_r500 = r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515;
|
||||
compiler.Base.max_temp_regs = 32;
|
||||
compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 1024 : 256;
|
||||
|
||||
r3xx_compile_vertex_program(&compiler);
|
||||
}
|
||||
|
@ -120,6 +123,7 @@ static void create_fragment_program(struct r300_context *r300)
|
|||
compiler.enable_shadow_ambient = GL_TRUE;
|
||||
compiler.Base.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515);
|
||||
compiler.Base.max_temp_regs = (compiler.Base.is_r500) ? 128 : 32;
|
||||
compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 512 : 64;
|
||||
compiler.code = &r300->blit.fp_code;
|
||||
compiler.AllocateHwInputs = fp_allocate_hw_inputs;
|
||||
|
||||
|
|
|
@ -221,6 +221,7 @@ static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_prog
|
|||
compiler.enable_shadow_ambient = GL_TRUE;
|
||||
compiler.Base.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) ? GL_TRUE : GL_FALSE;
|
||||
compiler.Base.max_temp_regs = (compiler.Base.is_r500) ? 128 : 32;
|
||||
compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 512 : 64;
|
||||
compiler.OutputDepth = FRAG_RESULT_DEPTH;
|
||||
memset(compiler.OutputColor, 0, 4 * sizeof(unsigned));
|
||||
compiler.OutputColor[0] = FRAG_RESULT_COLOR;
|
||||
|
|
|
@ -244,6 +244,9 @@ static struct r300_vertex_program *build_program(GLcontext *ctx,
|
|||
compiler.code = &vp->code;
|
||||
compiler.RequiredOutputs = compute_required_outputs(vp->Base, vp->key.FpReads);
|
||||
compiler.SetHwInputOutput = &t_inputs_outputs;
|
||||
compiler.Base.is_r500 = R300_CONTEXT(ctx)->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515;
|
||||
compiler.Base.max_temp_regs = 32;
|
||||
compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 1024 : 256;
|
||||
|
||||
if (compiler.Base.Debug) {
|
||||
fprintf(stderr, "Initial vertex program:\n");
|
||||
|
|
Loading…
Reference in New Issue