1335 lines
39 KiB
C
1335 lines
39 KiB
C
/*
|
|
* Mesa 3-D graphics library
|
|
*
|
|
* Copyright (C) 2009 VMware, Inc. All Rights Reserved.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included
|
|
* in all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
|
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
|
|
|
|
#include "main/glheader.h"
|
|
#include "main/context.h"
|
|
#include "main/macros.h"
|
|
#include "program.h"
|
|
#include "prog_instruction.h"
|
|
#include "prog_optimize.h"
|
|
#include "prog_print.h"
|
|
|
|
|
|
#define MAX_LOOP_NESTING 50
|
|
/* MAX_PROGRAM_TEMPS is a low number (256), and we want to be able to
|
|
* register allocate many temporary values into that small number of
|
|
* temps. So allow large temporary indices coming into the register
|
|
* allocator.
|
|
*/
|
|
#define REG_ALLOCATE_MAX_PROGRAM_TEMPS ((1 << INST_INDEX_BITS) - 1)
|
|
|
|
static GLboolean dbg = GL_FALSE;
|
|
|
|
#define NO_MASK 0xf
|
|
|
|
/**
|
|
* Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
|
|
* are read from the given src in this instruction, We also provide
|
|
* one optional masks which may mask other components in the dst
|
|
* register
|
|
*/
|
|
static GLuint
|
|
get_src_arg_mask(const struct prog_instruction *inst,
|
|
GLuint arg, GLuint dst_mask)
|
|
{
|
|
GLuint read_mask, channel_mask;
|
|
GLuint comp;
|
|
|
|
assert(arg < _mesa_num_inst_src_regs(inst->Opcode));
|
|
|
|
/* Form the dst register, find the written channels */
|
|
switch (inst->Opcode) {
|
|
case OPCODE_MOV:
|
|
case OPCODE_MIN:
|
|
case OPCODE_MAX:
|
|
case OPCODE_ABS:
|
|
case OPCODE_ADD:
|
|
case OPCODE_MAD:
|
|
case OPCODE_MUL:
|
|
case OPCODE_SUB:
|
|
case OPCODE_CMP:
|
|
case OPCODE_FLR:
|
|
case OPCODE_FRC:
|
|
case OPCODE_LRP:
|
|
case OPCODE_SGE:
|
|
case OPCODE_SLT:
|
|
case OPCODE_SSG:
|
|
channel_mask = inst->DstReg.WriteMask & dst_mask;
|
|
break;
|
|
case OPCODE_RCP:
|
|
case OPCODE_SIN:
|
|
case OPCODE_COS:
|
|
case OPCODE_RSQ:
|
|
case OPCODE_POW:
|
|
case OPCODE_EX2:
|
|
case OPCODE_LOG:
|
|
channel_mask = WRITEMASK_X;
|
|
break;
|
|
case OPCODE_DP2:
|
|
channel_mask = WRITEMASK_XY;
|
|
break;
|
|
case OPCODE_DP3:
|
|
case OPCODE_XPD:
|
|
channel_mask = WRITEMASK_XYZ;
|
|
break;
|
|
default:
|
|
channel_mask = WRITEMASK_XYZW;
|
|
break;
|
|
}
|
|
|
|
/* Now, given the src swizzle and the written channels, find which
|
|
* components are actually read
|
|
*/
|
|
read_mask = 0x0;
|
|
for (comp = 0; comp < 4; ++comp) {
|
|
const GLuint coord = GET_SWZ(inst->SrcReg[arg].Swizzle, comp);
|
|
if (channel_mask & (1 << comp) && coord <= SWIZZLE_W)
|
|
read_mask |= 1 << coord;
|
|
}
|
|
|
|
return read_mask;
|
|
}
|
|
|
|
|
|
/**
|
|
* For a MOV instruction, compute a write mask when src register also has
|
|
* a mask
|
|
*/
|
|
static GLuint
|
|
get_dst_mask_for_mov(const struct prog_instruction *mov, GLuint src_mask)
|
|
{
|
|
const GLuint mask = mov->DstReg.WriteMask;
|
|
GLuint comp;
|
|
GLuint updated_mask = 0x0;
|
|
|
|
assert(mov->Opcode == OPCODE_MOV);
|
|
|
|
for (comp = 0; comp < 4; ++comp) {
|
|
GLuint src_comp;
|
|
if ((mask & (1 << comp)) == 0)
|
|
continue;
|
|
src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, comp);
|
|
if ((src_mask & (1 << src_comp)) == 0)
|
|
continue;
|
|
updated_mask |= 1 << comp;
|
|
}
|
|
|
|
return updated_mask;
|
|
}
|
|
|
|
|
|
/**
|
|
* Ensure that the swizzle is regular. That is, all of the swizzle
|
|
* terms are SWIZZLE_X,Y,Z,W and not SWIZZLE_ZERO or SWIZZLE_ONE.
|
|
*/
|
|
static GLboolean
|
|
is_swizzle_regular(GLuint swz)
|
|
{
|
|
return GET_SWZ(swz,0) <= SWIZZLE_W &&
|
|
GET_SWZ(swz,1) <= SWIZZLE_W &&
|
|
GET_SWZ(swz,2) <= SWIZZLE_W &&
|
|
GET_SWZ(swz,3) <= SWIZZLE_W;
|
|
}
|
|
|
|
|
|
/**
|
|
* In 'prog' remove instruction[i] if removeFlags[i] == TRUE.
|
|
* \return number of instructions removed
|
|
*/
|
|
static GLuint
|
|
remove_instructions(struct gl_program *prog, const GLboolean *removeFlags,
|
|
void *mem_ctx)
|
|
{
|
|
GLint i, removeEnd = 0, removeCount = 0;
|
|
GLuint totalRemoved = 0;
|
|
|
|
/* go backward */
|
|
for (i = prog->arb.NumInstructions - 1; i >= 0; i--) {
|
|
if (removeFlags[i]) {
|
|
totalRemoved++;
|
|
if (removeCount == 0) {
|
|
/* begin a run of instructions to remove */
|
|
removeEnd = i;
|
|
removeCount = 1;
|
|
}
|
|
else {
|
|
/* extend the run of instructions to remove */
|
|
removeCount++;
|
|
}
|
|
}
|
|
else {
|
|
/* don't remove this instruction, but check if the preceeding
|
|
* instructions are to be removed.
|
|
*/
|
|
if (removeCount > 0) {
|
|
GLint removeStart = removeEnd - removeCount + 1;
|
|
_mesa_delete_instructions(prog, removeStart, removeCount, mem_ctx);
|
|
removeStart = removeCount = 0; /* reset removal info */
|
|
}
|
|
}
|
|
}
|
|
/* Finish removing if the first instruction was to be removed. */
|
|
if (removeCount > 0) {
|
|
GLint removeStart = removeEnd - removeCount + 1;
|
|
_mesa_delete_instructions(prog, removeStart, removeCount, mem_ctx);
|
|
}
|
|
return totalRemoved;
|
|
}
|
|
|
|
|
|
/**
|
|
* Remap register indexes according to map.
|
|
* \param prog the program to search/replace
|
|
* \param file the type of register file to search/replace
|
|
* \param map maps old register indexes to new indexes
|
|
*/
|
|
static void
|
|
replace_regs(struct gl_program *prog, gl_register_file file, const GLint map[])
|
|
{
|
|
GLuint i;
|
|
|
|
for (i = 0; i < prog->arb.NumInstructions; i++) {
|
|
struct prog_instruction *inst = prog->arb.Instructions + i;
|
|
const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
|
|
GLuint j;
|
|
for (j = 0; j < numSrc; j++) {
|
|
if (inst->SrcReg[j].File == file) {
|
|
GLuint index = inst->SrcReg[j].Index;
|
|
assert(map[index] >= 0);
|
|
inst->SrcReg[j].Index = map[index];
|
|
}
|
|
}
|
|
if (inst->DstReg.File == file) {
|
|
const GLuint index = inst->DstReg.Index;
|
|
assert(map[index] >= 0);
|
|
inst->DstReg.Index = map[index];
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* Remove dead instructions from the given program.
|
|
* This is very primitive for now. Basically look for temp registers
|
|
* that are written to but never read. Remove any instructions that
|
|
* write to such registers. Be careful with condition code setters.
|
|
*/
|
|
static GLboolean
|
|
_mesa_remove_dead_code_global(struct gl_program *prog, void *mem_ctx)
|
|
{
|
|
GLboolean tempRead[REG_ALLOCATE_MAX_PROGRAM_TEMPS][4];
|
|
GLboolean *removeInst; /* per-instruction removal flag */
|
|
GLuint i, rem = 0, comp;
|
|
|
|
memset(tempRead, 0, sizeof(tempRead));
|
|
|
|
if (dbg) {
|
|
printf("Optimize: Begin dead code removal\n");
|
|
/*_mesa_print_program(prog);*/
|
|
}
|
|
|
|
removeInst =
|
|
calloc(prog->arb.NumInstructions, sizeof(GLboolean));
|
|
|
|
/* Determine which temps are read and written */
|
|
for (i = 0; i < prog->arb.NumInstructions; i++) {
|
|
const struct prog_instruction *inst = prog->arb.Instructions + i;
|
|
const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
|
|
GLuint j;
|
|
|
|
/* check src regs */
|
|
for (j = 0; j < numSrc; j++) {
|
|
if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
|
|
const GLuint index = inst->SrcReg[j].Index;
|
|
GLuint read_mask;
|
|
assert(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
|
|
read_mask = get_src_arg_mask(inst, j, NO_MASK);
|
|
|
|
if (inst->SrcReg[j].RelAddr) {
|
|
if (dbg)
|
|
printf("abort remove dead code (indirect temp)\n");
|
|
goto done;
|
|
}
|
|
|
|
for (comp = 0; comp < 4; comp++) {
|
|
const GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, comp);
|
|
if (swz <= SWIZZLE_W) {
|
|
if ((read_mask & (1 << swz)) == 0)
|
|
continue;
|
|
tempRead[index][swz] = GL_TRUE;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* check dst reg */
|
|
if (inst->DstReg.File == PROGRAM_TEMPORARY) {
|
|
assert(inst->DstReg.Index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
|
|
|
|
if (inst->DstReg.RelAddr) {
|
|
if (dbg)
|
|
printf("abort remove dead code (indirect temp)\n");
|
|
goto done;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* find instructions that write to dead registers, flag for removal */
|
|
for (i = 0; i < prog->arb.NumInstructions; i++) {
|
|
struct prog_instruction *inst = prog->arb.Instructions + i;
|
|
const GLuint numDst = _mesa_num_inst_dst_regs(inst->Opcode);
|
|
|
|
if (numDst != 0 && inst->DstReg.File == PROGRAM_TEMPORARY) {
|
|
GLint chan, index = inst->DstReg.Index;
|
|
|
|
for (chan = 0; chan < 4; chan++) {
|
|
if (!tempRead[index][chan] &&
|
|
inst->DstReg.WriteMask & (1 << chan)) {
|
|
if (dbg) {
|
|
printf("Remove writemask on %u.%c\n", i,
|
|
chan == 3 ? 'w' : 'x' + chan);
|
|
}
|
|
inst->DstReg.WriteMask &= ~(1 << chan);
|
|
rem++;
|
|
}
|
|
}
|
|
|
|
if (inst->DstReg.WriteMask == 0) {
|
|
/* If we cleared all writes, the instruction can be removed. */
|
|
if (dbg)
|
|
printf("Remove instruction %u: \n", i);
|
|
removeInst[i] = GL_TRUE;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* now remove the instructions which aren't needed */
|
|
rem = remove_instructions(prog, removeInst, mem_ctx);
|
|
|
|
if (dbg) {
|
|
printf("Optimize: End dead code removal.\n");
|
|
printf(" %u channel writes removed\n", rem);
|
|
printf(" %u instructions removed\n", rem);
|
|
/*_mesa_print_program(prog);*/
|
|
}
|
|
|
|
done:
|
|
free(removeInst);
|
|
return rem != 0;
|
|
}
|
|
|
|
|
|
enum inst_use
|
|
{
|
|
READ,
|
|
WRITE,
|
|
FLOW,
|
|
END
|
|
};
|
|
|
|
|
|
/**
|
|
* Scan forward in program from 'start' for the next occurances of TEMP[index].
|
|
* We look if an instruction reads the component given by the masks and if they
|
|
* are overwritten.
|
|
* Return READ, WRITE, FLOW or END to indicate the next usage or an indicator
|
|
* that we can't look further.
|
|
*/
|
|
static enum inst_use
|
|
find_next_use(const struct gl_program *prog,
|
|
GLuint start,
|
|
GLuint index,
|
|
GLuint mask)
|
|
{
|
|
GLuint i;
|
|
|
|
for (i = start; i < prog->arb.NumInstructions; i++) {
|
|
const struct prog_instruction *inst = prog->arb.Instructions + i;
|
|
switch (inst->Opcode) {
|
|
case OPCODE_BGNLOOP:
|
|
case OPCODE_BGNSUB:
|
|
case OPCODE_CAL:
|
|
case OPCODE_CONT:
|
|
case OPCODE_IF:
|
|
case OPCODE_ELSE:
|
|
case OPCODE_ENDIF:
|
|
case OPCODE_ENDLOOP:
|
|
case OPCODE_ENDSUB:
|
|
case OPCODE_RET:
|
|
return FLOW;
|
|
case OPCODE_END:
|
|
return END;
|
|
default:
|
|
{
|
|
const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
|
|
GLuint j;
|
|
for (j = 0; j < numSrc; j++) {
|
|
if (inst->SrcReg[j].RelAddr ||
|
|
(inst->SrcReg[j].File == PROGRAM_TEMPORARY &&
|
|
inst->SrcReg[j].Index == (GLint)index &&
|
|
(get_src_arg_mask(inst,j,NO_MASK) & mask)))
|
|
return READ;
|
|
}
|
|
if (_mesa_num_inst_dst_regs(inst->Opcode) == 1 &&
|
|
inst->DstReg.File == PROGRAM_TEMPORARY &&
|
|
inst->DstReg.Index == index) {
|
|
mask &= ~inst->DstReg.WriteMask;
|
|
if (mask == 0)
|
|
return WRITE;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return END;
|
|
}
|
|
|
|
|
|
/**
|
|
* Is the given instruction opcode a flow-control opcode?
|
|
* XXX maybe move this into prog_instruction.[ch]
|
|
*/
|
|
static GLboolean
|
|
_mesa_is_flow_control_opcode(enum prog_opcode opcode)
|
|
{
|
|
switch (opcode) {
|
|
case OPCODE_BGNLOOP:
|
|
case OPCODE_BGNSUB:
|
|
case OPCODE_CAL:
|
|
case OPCODE_CONT:
|
|
case OPCODE_IF:
|
|
case OPCODE_ELSE:
|
|
case OPCODE_END:
|
|
case OPCODE_ENDIF:
|
|
case OPCODE_ENDLOOP:
|
|
case OPCODE_ENDSUB:
|
|
case OPCODE_RET:
|
|
return GL_TRUE;
|
|
default:
|
|
return GL_FALSE;
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* Test if the given instruction is a simple MOV (no conditional updating,
|
|
* not relative addressing, no negation/abs, etc).
|
|
*/
|
|
static GLboolean
|
|
can_downward_mov_be_modifed(const struct prog_instruction *mov)
|
|
{
|
|
return
|
|
mov->Opcode == OPCODE_MOV &&
|
|
mov->SrcReg[0].RelAddr == 0 &&
|
|
mov->SrcReg[0].Negate == 0 &&
|
|
mov->DstReg.RelAddr == 0;
|
|
}
|
|
|
|
|
|
static GLboolean
|
|
can_upward_mov_be_modifed(const struct prog_instruction *mov)
|
|
{
|
|
return
|
|
can_downward_mov_be_modifed(mov) &&
|
|
mov->DstReg.File == PROGRAM_TEMPORARY &&
|
|
!mov->Saturate;
|
|
}
|
|
|
|
|
|
/**
|
|
* Try to remove use of extraneous MOV instructions, to free them up for dead
|
|
* code removal.
|
|
*/
|
|
static void
|
|
_mesa_remove_extra_move_use(struct gl_program *prog)
|
|
{
|
|
GLuint i, j;
|
|
|
|
if (dbg) {
|
|
printf("Optimize: Begin remove extra move use\n");
|
|
_mesa_print_program(prog);
|
|
}
|
|
|
|
/*
|
|
* Look for sequences such as this:
|
|
* MOV tmpX, arg0;
|
|
* ...
|
|
* FOO tmpY, tmpX, arg1;
|
|
* and convert into:
|
|
* MOV tmpX, arg0;
|
|
* ...
|
|
* FOO tmpY, arg0, arg1;
|
|
*/
|
|
|
|
for (i = 0; i + 1 < prog->arb.NumInstructions; i++) {
|
|
const struct prog_instruction *mov = prog->arb.Instructions + i;
|
|
GLuint dst_mask, src_mask;
|
|
if (can_upward_mov_be_modifed(mov) == GL_FALSE)
|
|
continue;
|
|
|
|
/* Scanning the code, we maintain the components which are still active in
|
|
* these two masks
|
|
*/
|
|
dst_mask = mov->DstReg.WriteMask;
|
|
src_mask = get_src_arg_mask(mov, 0, NO_MASK);
|
|
|
|
/* Walk through remaining instructions until the or src reg gets
|
|
* rewritten or we get into some flow-control, eliminating the use of
|
|
* this MOV.
|
|
*/
|
|
for (j = i + 1; j < prog->arb.NumInstructions; j++) {
|
|
struct prog_instruction *inst2 = prog->arb.Instructions + j;
|
|
GLuint arg;
|
|
|
|
if (_mesa_is_flow_control_opcode(inst2->Opcode))
|
|
break;
|
|
|
|
/* First rewrite this instruction's args if appropriate. */
|
|
for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) {
|
|
GLuint comp, read_mask;
|
|
|
|
if (inst2->SrcReg[arg].File != mov->DstReg.File ||
|
|
inst2->SrcReg[arg].Index != mov->DstReg.Index ||
|
|
inst2->SrcReg[arg].RelAddr)
|
|
continue;
|
|
read_mask = get_src_arg_mask(inst2, arg, NO_MASK);
|
|
|
|
/* Adjust the swizzles of inst2 to point at MOV's source if ALL the
|
|
* components read still come from the mov instructions
|
|
*/
|
|
if (is_swizzle_regular(inst2->SrcReg[arg].Swizzle) &&
|
|
(read_mask & dst_mask) == read_mask) {
|
|
for (comp = 0; comp < 4; comp++) {
|
|
const GLuint inst2_swz =
|
|
GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
|
|
const GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz);
|
|
inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp));
|
|
inst2->SrcReg[arg].Swizzle |= s << (3 * comp);
|
|
inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >>
|
|
inst2_swz) & 0x1) << comp);
|
|
}
|
|
inst2->SrcReg[arg].File = mov->SrcReg[0].File;
|
|
inst2->SrcReg[arg].Index = mov->SrcReg[0].Index;
|
|
}
|
|
}
|
|
|
|
/* The source of MOV is written. This potentially deactivates some
|
|
* components from the src and dst of the MOV instruction
|
|
*/
|
|
if (inst2->DstReg.File == mov->DstReg.File &&
|
|
(inst2->DstReg.RelAddr ||
|
|
inst2->DstReg.Index == mov->DstReg.Index)) {
|
|
dst_mask &= ~inst2->DstReg.WriteMask;
|
|
src_mask = get_src_arg_mask(mov, 0, dst_mask);
|
|
}
|
|
|
|
/* Idem when the destination of mov is written */
|
|
if (inst2->DstReg.File == mov->SrcReg[0].File &&
|
|
(inst2->DstReg.RelAddr ||
|
|
inst2->DstReg.Index == mov->SrcReg[0].Index)) {
|
|
src_mask &= ~inst2->DstReg.WriteMask;
|
|
dst_mask &= get_dst_mask_for_mov(mov, src_mask);
|
|
}
|
|
if (dst_mask == 0)
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (dbg) {
|
|
printf("Optimize: End remove extra move use.\n");
|
|
/*_mesa_print_program(prog);*/
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* Complements dead_code_global. Try to remove code in block of code by
|
|
* carefully monitoring the swizzles. Both functions should be merged into one
|
|
* with a proper control flow graph
|
|
*/
|
|
static GLboolean
|
|
_mesa_remove_dead_code_local(struct gl_program *prog, void *mem_ctx)
|
|
{
|
|
GLboolean *removeInst;
|
|
GLuint i, arg, rem = 0;
|
|
|
|
removeInst =
|
|
calloc(prog->arb.NumInstructions, sizeof(GLboolean));
|
|
|
|
for (i = 0; i < prog->arb.NumInstructions; i++) {
|
|
const struct prog_instruction *inst = prog->arb.Instructions + i;
|
|
const GLuint index = inst->DstReg.Index;
|
|
const GLuint mask = inst->DstReg.WriteMask;
|
|
enum inst_use use;
|
|
|
|
/* We must deactivate the pass as soon as some indirection is used */
|
|
if (inst->DstReg.RelAddr)
|
|
goto done;
|
|
for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++)
|
|
if (inst->SrcReg[arg].RelAddr)
|
|
goto done;
|
|
|
|
if (_mesa_is_flow_control_opcode(inst->Opcode) ||
|
|
_mesa_num_inst_dst_regs(inst->Opcode) == 0 ||
|
|
inst->DstReg.File != PROGRAM_TEMPORARY ||
|
|
inst->DstReg.RelAddr)
|
|
continue;
|
|
|
|
use = find_next_use(prog, i+1, index, mask);
|
|
if (use == WRITE || use == END)
|
|
removeInst[i] = GL_TRUE;
|
|
}
|
|
|
|
rem = remove_instructions(prog, removeInst, mem_ctx);
|
|
|
|
done:
|
|
free(removeInst);
|
|
return rem != 0;
|
|
}
|
|
|
|
|
|
/**
|
|
* Try to inject the destination of mov as the destination of inst and recompute
|
|
* the swizzles operators for the sources of inst if required. Return GL_TRUE
|
|
* of the substitution was possible, GL_FALSE otherwise
|
|
*/
|
|
static GLboolean
|
|
_mesa_merge_mov_into_inst(struct prog_instruction *inst,
|
|
const struct prog_instruction *mov)
|
|
{
|
|
/* Indirection table which associates destination and source components for
|
|
* the mov instruction
|
|
*/
|
|
const GLuint mask = get_src_arg_mask(mov, 0, NO_MASK);
|
|
|
|
/* Some components are not written by inst. We cannot remove the mov */
|
|
if (mask != (inst->DstReg.WriteMask & mask))
|
|
return GL_FALSE;
|
|
|
|
inst->Saturate |= mov->Saturate;
|
|
|
|
/* Depending on the instruction, we may need to recompute the swizzles.
|
|
* Also, some other instructions (like TEX) are not linear. We will only
|
|
* consider completely active sources and destinations
|
|
*/
|
|
switch (inst->Opcode) {
|
|
|
|
/* Carstesian instructions: we compute the swizzle */
|
|
case OPCODE_MOV:
|
|
case OPCODE_MIN:
|
|
case OPCODE_MAX:
|
|
case OPCODE_ABS:
|
|
case OPCODE_ADD:
|
|
case OPCODE_MAD:
|
|
case OPCODE_MUL:
|
|
case OPCODE_SUB:
|
|
{
|
|
GLuint dst_to_src_comp[4] = {0,0,0,0};
|
|
GLuint dst_comp, arg;
|
|
for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
|
|
if (mov->DstReg.WriteMask & (1 << dst_comp)) {
|
|
const GLuint src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, dst_comp);
|
|
assert(src_comp < 4);
|
|
dst_to_src_comp[dst_comp] = src_comp;
|
|
}
|
|
}
|
|
|
|
/* Patch each source of the instruction */
|
|
for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++) {
|
|
const GLuint arg_swz = inst->SrcReg[arg].Swizzle;
|
|
inst->SrcReg[arg].Swizzle = 0;
|
|
|
|
/* Reset each active component of the swizzle */
|
|
for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
|
|
GLuint src_comp, arg_comp;
|
|
if ((mov->DstReg.WriteMask & (1 << dst_comp)) == 0)
|
|
continue;
|
|
src_comp = dst_to_src_comp[dst_comp];
|
|
assert(src_comp < 4);
|
|
arg_comp = GET_SWZ(arg_swz, src_comp);
|
|
assert(arg_comp < 4);
|
|
inst->SrcReg[arg].Swizzle |= arg_comp << (3*dst_comp);
|
|
}
|
|
}
|
|
inst->DstReg = mov->DstReg;
|
|
return GL_TRUE;
|
|
}
|
|
|
|
/* Dot products and scalar instructions: we only change the destination */
|
|
case OPCODE_RCP:
|
|
case OPCODE_SIN:
|
|
case OPCODE_COS:
|
|
case OPCODE_RSQ:
|
|
case OPCODE_POW:
|
|
case OPCODE_EX2:
|
|
case OPCODE_LOG:
|
|
case OPCODE_DP2:
|
|
case OPCODE_DP3:
|
|
case OPCODE_DP4:
|
|
inst->DstReg = mov->DstReg;
|
|
return GL_TRUE;
|
|
|
|
/* All other instructions require fully active components with no swizzle */
|
|
default:
|
|
if (mov->SrcReg[0].Swizzle != SWIZZLE_XYZW ||
|
|
inst->DstReg.WriteMask != WRITEMASK_XYZW)
|
|
return GL_FALSE;
|
|
inst->DstReg = mov->DstReg;
|
|
return GL_TRUE;
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* Try to remove extraneous MOV instructions from the given program.
|
|
*/
|
|
static GLboolean
|
|
_mesa_remove_extra_moves(struct gl_program *prog, void *mem_ctx)
|
|
{
|
|
GLboolean *removeInst; /* per-instruction removal flag */
|
|
GLuint i, rem = 0, nesting = 0;
|
|
|
|
if (dbg) {
|
|
printf("Optimize: Begin remove extra moves\n");
|
|
_mesa_print_program(prog);
|
|
}
|
|
|
|
removeInst =
|
|
calloc(prog->arb.NumInstructions, sizeof(GLboolean));
|
|
|
|
/*
|
|
* Look for sequences such as this:
|
|
* FOO tmpX, arg0, arg1;
|
|
* MOV tmpY, tmpX;
|
|
* and convert into:
|
|
* FOO tmpY, arg0, arg1;
|
|
*/
|
|
|
|
for (i = 0; i < prog->arb.NumInstructions; i++) {
|
|
const struct prog_instruction *mov = prog->arb.Instructions + i;
|
|
|
|
switch (mov->Opcode) {
|
|
case OPCODE_BGNLOOP:
|
|
case OPCODE_BGNSUB:
|
|
case OPCODE_IF:
|
|
nesting++;
|
|
break;
|
|
case OPCODE_ENDLOOP:
|
|
case OPCODE_ENDSUB:
|
|
case OPCODE_ENDIF:
|
|
nesting--;
|
|
break;
|
|
case OPCODE_MOV:
|
|
if (i > 0 &&
|
|
can_downward_mov_be_modifed(mov) &&
|
|
mov->SrcReg[0].File == PROGRAM_TEMPORARY &&
|
|
nesting == 0)
|
|
{
|
|
|
|
/* see if this MOV can be removed */
|
|
const GLuint id = mov->SrcReg[0].Index;
|
|
struct prog_instruction *prevInst;
|
|
GLuint prevI;
|
|
|
|
/* get pointer to previous instruction */
|
|
prevI = i - 1;
|
|
while (prevI > 0 && removeInst[prevI])
|
|
prevI--;
|
|
prevInst = prog->arb.Instructions + prevI;
|
|
|
|
if (prevInst->DstReg.File == PROGRAM_TEMPORARY &&
|
|
prevInst->DstReg.Index == id &&
|
|
prevInst->DstReg.RelAddr == 0) {
|
|
|
|
const GLuint dst_mask = prevInst->DstReg.WriteMask;
|
|
enum inst_use next_use = find_next_use(prog, i+1, id, dst_mask);
|
|
|
|
if (next_use == WRITE || next_use == END) {
|
|
/* OK, we can safely remove this MOV instruction.
|
|
* Transform:
|
|
* prevI: FOO tempIndex, x, y;
|
|
* i: MOV z, tempIndex;
|
|
* Into:
|
|
* prevI: FOO z, x, y;
|
|
*/
|
|
if (_mesa_merge_mov_into_inst(prevInst, mov)) {
|
|
removeInst[i] = GL_TRUE;
|
|
if (dbg) {
|
|
printf("Remove MOV at %u\n", i);
|
|
printf("new prev inst %u: ", prevI);
|
|
_mesa_print_instruction(prevInst);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
; /* nothing */
|
|
}
|
|
}
|
|
|
|
/* now remove the instructions which aren't needed */
|
|
rem = remove_instructions(prog, removeInst, mem_ctx);
|
|
|
|
free(removeInst);
|
|
|
|
if (dbg) {
|
|
printf("Optimize: End remove extra moves. %u instructions removed\n", rem);
|
|
/*_mesa_print_program(prog);*/
|
|
}
|
|
|
|
return rem != 0;
|
|
}
|
|
|
|
|
|
/** A live register interval */
|
|
struct interval
|
|
{
|
|
GLuint Reg; /** The temporary register index */
|
|
GLuint Start, End; /** Start/end instruction numbers */
|
|
};
|
|
|
|
|
|
/** A list of register intervals */
|
|
struct interval_list
|
|
{
|
|
GLuint Num;
|
|
struct interval Intervals[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
|
|
};
|
|
|
|
|
|
static void
|
|
append_interval(struct interval_list *list, const struct interval *inv)
|
|
{
|
|
list->Intervals[list->Num++] = *inv;
|
|
}
|
|
|
|
|
|
/** Insert interval inv into list, sorted by interval end */
|
|
static void
|
|
insert_interval_by_end(struct interval_list *list, const struct interval *inv)
|
|
{
|
|
/* XXX we could do a binary search insertion here since list is sorted */
|
|
GLint i = list->Num - 1;
|
|
while (i >= 0 && list->Intervals[i].End > inv->End) {
|
|
list->Intervals[i + 1] = list->Intervals[i];
|
|
i--;
|
|
}
|
|
list->Intervals[i + 1] = *inv;
|
|
list->Num++;
|
|
|
|
#ifdef DEBUG
|
|
{
|
|
GLuint i;
|
|
for (i = 0; i + 1 < list->Num; i++) {
|
|
assert(list->Intervals[i].End <= list->Intervals[i + 1].End);
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
|
|
/** Remove the given interval from the interval list */
|
|
static void
|
|
remove_interval(struct interval_list *list, const struct interval *inv)
|
|
{
|
|
/* XXX we could binary search since list is sorted */
|
|
GLuint k;
|
|
for (k = 0; k < list->Num; k++) {
|
|
if (list->Intervals[k].Reg == inv->Reg) {
|
|
/* found, remove it */
|
|
assert(list->Intervals[k].Start == inv->Start);
|
|
assert(list->Intervals[k].End == inv->End);
|
|
while (k < list->Num - 1) {
|
|
list->Intervals[k] = list->Intervals[k + 1];
|
|
k++;
|
|
}
|
|
list->Num--;
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/** called by qsort() */
|
|
static int
|
|
compare_start(const void *a, const void *b)
|
|
{
|
|
const struct interval *ia = (const struct interval *) a;
|
|
const struct interval *ib = (const struct interval *) b;
|
|
if (ia->Start < ib->Start)
|
|
return -1;
|
|
else if (ia->Start > ib->Start)
|
|
return +1;
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
|
|
/** sort the interval list according to interval starts */
|
|
static void
|
|
sort_interval_list_by_start(struct interval_list *list)
|
|
{
|
|
qsort(list->Intervals, list->Num, sizeof(struct interval), compare_start);
|
|
#ifdef DEBUG
|
|
{
|
|
GLuint i;
|
|
for (i = 0; i + 1 < list->Num; i++) {
|
|
assert(list->Intervals[i].Start <= list->Intervals[i + 1].Start);
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
struct loop_info
|
|
{
|
|
GLuint Start, End; /**< Start, end instructions of loop */
|
|
};
|
|
|
|
/**
|
|
* Update the intermediate interval info for register 'index' and
|
|
* instruction 'ic'.
|
|
*/
|
|
static void
|
|
update_interval(GLint intBegin[], GLint intEnd[],
|
|
struct loop_info *loopStack, GLuint loopStackDepth,
|
|
GLuint index, GLuint ic)
|
|
{
|
|
unsigned i;
|
|
GLuint begin = ic;
|
|
GLuint end = ic;
|
|
|
|
/* If the register is used in a loop, extend its lifetime through the end
|
|
* of the outermost loop that doesn't contain its definition.
|
|
*/
|
|
for (i = 0; i < loopStackDepth; i++) {
|
|
if (intBegin[index] < loopStack[i].Start) {
|
|
end = loopStack[i].End;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* Variables that are live at the end of a loop will also be live at the
|
|
* beginning, so an instruction inside of a loop should have its live
|
|
* interval begin at the start of the outermost loop.
|
|
*/
|
|
if (loopStackDepth > 0 && ic > loopStack[0].Start && ic < loopStack[0].End) {
|
|
begin = loopStack[0].Start;
|
|
}
|
|
|
|
assert(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
|
|
if (intBegin[index] == -1) {
|
|
assert(intEnd[index] == -1);
|
|
intBegin[index] = begin;
|
|
intEnd[index] = end;
|
|
}
|
|
else {
|
|
intEnd[index] = end;
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* Find first/last instruction that references each temporary register.
|
|
*/
|
|
static bool
|
|
find_temp_intervals(const struct prog_instruction *instructions,
|
|
GLuint numInstructions,
|
|
GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS],
|
|
GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS])
|
|
{
|
|
struct loop_info loopStack[MAX_LOOP_NESTING];
|
|
GLuint loopStackDepth = 0;
|
|
GLuint i;
|
|
|
|
for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){
|
|
intBegin[i] = intEnd[i] = -1;
|
|
}
|
|
|
|
/* Scan instructions looking for temporary registers */
|
|
for (i = 0; i < numInstructions; i++) {
|
|
const struct prog_instruction *inst = instructions + i;
|
|
if (inst->Opcode == OPCODE_BGNLOOP) {
|
|
loopStack[loopStackDepth].Start = i;
|
|
loopStack[loopStackDepth].End = inst->BranchTarget;
|
|
loopStackDepth++;
|
|
}
|
|
else if (inst->Opcode == OPCODE_ENDLOOP) {
|
|
loopStackDepth--;
|
|
}
|
|
else if (inst->Opcode == OPCODE_CAL) {
|
|
return GL_FALSE;
|
|
}
|
|
else {
|
|
const GLuint numSrc = 3;/*_mesa_num_inst_src_regs(inst->Opcode);*/
|
|
GLuint j;
|
|
for (j = 0; j < numSrc; j++) {
|
|
if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
|
|
const GLuint index = inst->SrcReg[j].Index;
|
|
if (inst->SrcReg[j].RelAddr)
|
|
return GL_FALSE;
|
|
update_interval(intBegin, intEnd, loopStack, loopStackDepth,
|
|
index, i);
|
|
}
|
|
}
|
|
if (inst->DstReg.File == PROGRAM_TEMPORARY) {
|
|
const GLuint index = inst->DstReg.Index;
|
|
if (inst->DstReg.RelAddr)
|
|
return GL_FALSE;
|
|
update_interval(intBegin, intEnd, loopStack, loopStackDepth,
|
|
index, i);
|
|
}
|
|
}
|
|
}
|
|
|
|
return GL_TRUE;
|
|
}
|
|
|
|
|
|
/**
|
|
* Find the live intervals for each temporary register in the program.
|
|
* For register R, the interval [A,B] indicates that R is referenced
|
|
* from instruction A through instruction B.
|
|
* Special consideration is needed for loops and subroutines.
|
|
* \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason
|
|
*/
|
|
static GLboolean
|
|
find_live_intervals(struct gl_program *prog,
|
|
struct interval_list *liveIntervals)
|
|
{
|
|
GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
|
|
GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
|
|
GLuint i;
|
|
|
|
/*
|
|
* Note: we'll return GL_FALSE below if we find relative indexing
|
|
* into the TEMP register file. We can't handle that yet.
|
|
* We also give up on subroutines for now.
|
|
*/
|
|
|
|
if (dbg) {
|
|
printf("Optimize: Begin find intervals\n");
|
|
}
|
|
|
|
/* build intermediate arrays */
|
|
if (!find_temp_intervals(prog->arb.Instructions, prog->arb.NumInstructions,
|
|
intBegin, intEnd))
|
|
return GL_FALSE;
|
|
|
|
/* Build live intervals list from intermediate arrays */
|
|
liveIntervals->Num = 0;
|
|
for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) {
|
|
if (intBegin[i] >= 0) {
|
|
struct interval inv;
|
|
inv.Reg = i;
|
|
inv.Start = intBegin[i];
|
|
inv.End = intEnd[i];
|
|
append_interval(liveIntervals, &inv);
|
|
}
|
|
}
|
|
|
|
/* Sort the list according to interval starts */
|
|
sort_interval_list_by_start(liveIntervals);
|
|
|
|
if (dbg) {
|
|
/* print interval info */
|
|
for (i = 0; i < liveIntervals->Num; i++) {
|
|
const struct interval *inv = liveIntervals->Intervals + i;
|
|
printf("Reg[%d] live [%d, %d]:",
|
|
inv->Reg, inv->Start, inv->End);
|
|
if (1) {
|
|
GLuint j;
|
|
for (j = 0; j < inv->Start; j++)
|
|
printf(" ");
|
|
for (j = inv->Start; j <= inv->End; j++)
|
|
printf("x");
|
|
}
|
|
printf("\n");
|
|
}
|
|
}
|
|
|
|
return GL_TRUE;
|
|
}
|
|
|
|
|
|
/** Scan the array of used register flags to find free entry */
|
|
static GLint
|
|
alloc_register(GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS])
|
|
{
|
|
GLuint k;
|
|
for (k = 0; k < REG_ALLOCATE_MAX_PROGRAM_TEMPS; k++) {
|
|
if (!usedRegs[k]) {
|
|
usedRegs[k] = GL_TRUE;
|
|
return k;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
|
|
/**
|
|
* This function implements "Linear Scan Register Allocation" to reduce
|
|
* the number of temporary registers used by the program.
|
|
*
|
|
* We compute the "live interval" for all temporary registers then
|
|
* examine the overlap of the intervals to allocate new registers.
|
|
* Basically, if two intervals do not overlap, they can use the same register.
|
|
*/
|
|
static void
|
|
_mesa_reallocate_registers(struct gl_program *prog)
|
|
{
|
|
struct interval_list liveIntervals;
|
|
GLint registerMap[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
|
|
GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
|
|
GLuint i;
|
|
GLint maxTemp = -1;
|
|
|
|
if (dbg) {
|
|
printf("Optimize: Begin live-interval register reallocation\n");
|
|
_mesa_print_program(prog);
|
|
}
|
|
|
|
for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){
|
|
registerMap[i] = -1;
|
|
usedRegs[i] = GL_FALSE;
|
|
}
|
|
|
|
if (!find_live_intervals(prog, &liveIntervals)) {
|
|
if (dbg)
|
|
printf("Aborting register reallocation\n");
|
|
return;
|
|
}
|
|
|
|
{
|
|
struct interval_list activeIntervals;
|
|
activeIntervals.Num = 0;
|
|
|
|
/* loop over live intervals, allocating a new register for each */
|
|
for (i = 0; i < liveIntervals.Num; i++) {
|
|
const struct interval *live = liveIntervals.Intervals + i;
|
|
|
|
if (dbg)
|
|
printf("Consider register %u\n", live->Reg);
|
|
|
|
/* Expire old intervals. Intervals which have ended with respect
|
|
* to the live interval can have their remapped registers freed.
|
|
*/
|
|
{
|
|
GLint j;
|
|
for (j = 0; j < (GLint) activeIntervals.Num; j++) {
|
|
const struct interval *inv = activeIntervals.Intervals + j;
|
|
if (inv->End >= live->Start) {
|
|
/* Stop now. Since the activeInterval list is sorted
|
|
* we know we don't have to go further.
|
|
*/
|
|
break;
|
|
}
|
|
else {
|
|
/* Interval 'inv' has expired */
|
|
const GLint regNew = registerMap[inv->Reg];
|
|
assert(regNew >= 0);
|
|
|
|
if (dbg)
|
|
printf(" expire interval for reg %u\n", inv->Reg);
|
|
|
|
/* remove interval j from active list */
|
|
remove_interval(&activeIntervals, inv);
|
|
j--; /* counter-act j++ in for-loop above */
|
|
|
|
/* return register regNew to the free pool */
|
|
if (dbg)
|
|
printf(" free reg %d\n", regNew);
|
|
assert(usedRegs[regNew] == GL_TRUE);
|
|
usedRegs[regNew] = GL_FALSE;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* find a free register for this live interval */
|
|
{
|
|
const GLint k = alloc_register(usedRegs);
|
|
if (k < 0) {
|
|
/* out of registers, give up */
|
|
return;
|
|
}
|
|
registerMap[live->Reg] = k;
|
|
maxTemp = MAX2(maxTemp, k);
|
|
if (dbg)
|
|
printf(" remap register %u -> %d\n", live->Reg, k);
|
|
}
|
|
|
|
/* Insert this live interval into the active list which is sorted
|
|
* by increasing end points.
|
|
*/
|
|
insert_interval_by_end(&activeIntervals, live);
|
|
}
|
|
}
|
|
|
|
if (maxTemp + 1 < (GLint) liveIntervals.Num) {
|
|
/* OK, we've reduced the number of registers needed.
|
|
* Scan the program and replace all the old temporary register
|
|
* indexes with the new indexes.
|
|
*/
|
|
replace_regs(prog, PROGRAM_TEMPORARY, registerMap);
|
|
|
|
prog->arb.NumTemporaries = maxTemp + 1;
|
|
}
|
|
|
|
if (dbg) {
|
|
printf("Optimize: End live-interval register reallocation\n");
|
|
printf("Num temp regs before: %u after: %u\n",
|
|
liveIntervals.Num, maxTemp + 1);
|
|
_mesa_print_program(prog);
|
|
}
|
|
}
|
|
|
|
|
|
#if 0
|
|
static void
|
|
print_it(struct gl_context *ctx, struct gl_program *program, const char *txt) {
|
|
fprintf(stderr, "%s (%u inst):\n", txt, program->arb.NumInstructions);
|
|
_mesa_print_program(program);
|
|
_mesa_print_program_parameters(ctx, program);
|
|
fprintf(stderr, "\n\n");
|
|
}
|
|
#endif
|
|
|
|
/**
|
|
* This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
|
|
* instruction is the first instruction to write to register T0. The are
|
|
* several lowering passes done in GLSL IR (e.g. branches and
|
|
* relative addressing) that create a large number of conditional assignments
|
|
* that ir_to_mesa converts to CMP instructions like the one mentioned above.
|
|
*
|
|
* Here is why this conversion is safe:
|
|
* CMP T0, T1 T2 T0 can be expanded to:
|
|
* if (T1 < 0.0)
|
|
* MOV T0, T2;
|
|
* else
|
|
* MOV T0, T0;
|
|
*
|
|
* If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
|
|
* as the original program. If (T1 < 0.0) evaluates to false, executing
|
|
* MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
|
|
* Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
|
|
* because any instruction that was going to read from T0 after this was going
|
|
* to read a garbage value anyway.
|
|
*/
|
|
static void
|
|
_mesa_simplify_cmp(struct gl_program * program)
|
|
{
|
|
GLuint tempWrites[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
|
|
GLuint outputWrites[MAX_PROGRAM_OUTPUTS];
|
|
GLuint i;
|
|
|
|
if (dbg) {
|
|
printf("Optimize: Begin reads without writes\n");
|
|
_mesa_print_program(program);
|
|
}
|
|
|
|
for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) {
|
|
tempWrites[i] = 0;
|
|
}
|
|
|
|
for (i = 0; i < MAX_PROGRAM_OUTPUTS; i++) {
|
|
outputWrites[i] = 0;
|
|
}
|
|
|
|
for (i = 0; i < program->arb.NumInstructions; i++) {
|
|
struct prog_instruction *inst = program->arb.Instructions + i;
|
|
GLuint prevWriteMask;
|
|
|
|
/* Give up if we encounter relative addressing or flow control. */
|
|
if (_mesa_is_flow_control_opcode(inst->Opcode) || inst->DstReg.RelAddr) {
|
|
return;
|
|
}
|
|
|
|
if (inst->DstReg.File == PROGRAM_OUTPUT) {
|
|
assert(inst->DstReg.Index < MAX_PROGRAM_OUTPUTS);
|
|
prevWriteMask = outputWrites[inst->DstReg.Index];
|
|
outputWrites[inst->DstReg.Index] |= inst->DstReg.WriteMask;
|
|
} else if (inst->DstReg.File == PROGRAM_TEMPORARY) {
|
|
assert(inst->DstReg.Index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
|
|
prevWriteMask = tempWrites[inst->DstReg.Index];
|
|
tempWrites[inst->DstReg.Index] |= inst->DstReg.WriteMask;
|
|
} else {
|
|
/* No other register type can be a destination register. */
|
|
continue;
|
|
}
|
|
|
|
/* For a CMP to be considered a conditional write, the destination
|
|
* register and source register two must be the same. */
|
|
if (inst->Opcode == OPCODE_CMP
|
|
&& !(inst->DstReg.WriteMask & prevWriteMask)
|
|
&& inst->SrcReg[2].File == inst->DstReg.File
|
|
&& inst->SrcReg[2].Index == inst->DstReg.Index
|
|
&& inst->DstReg.WriteMask == get_src_arg_mask(inst, 2, NO_MASK)) {
|
|
|
|
inst->Opcode = OPCODE_MOV;
|
|
inst->SrcReg[0] = inst->SrcReg[1];
|
|
|
|
/* Unused operands are expected to have the file set to
|
|
* PROGRAM_UNDEFINED. This is how _mesa_init_instructions initializes
|
|
* all of the sources.
|
|
*/
|
|
inst->SrcReg[1].File = PROGRAM_UNDEFINED;
|
|
inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
|
|
inst->SrcReg[2].File = PROGRAM_UNDEFINED;
|
|
inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
|
|
}
|
|
}
|
|
if (dbg) {
|
|
printf("Optimize: End reads without writes\n");
|
|
_mesa_print_program(program);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Apply optimizations to the given program to eliminate unnecessary
|
|
* instructions, temp regs, etc.
|
|
*/
|
|
void
|
|
_mesa_optimize_program(struct gl_program *program, void *mem_ctx)
|
|
{
|
|
GLboolean any_change;
|
|
|
|
_mesa_simplify_cmp(program);
|
|
/* Stop when no modifications were output */
|
|
do {
|
|
any_change = GL_FALSE;
|
|
_mesa_remove_extra_move_use(program);
|
|
if (_mesa_remove_dead_code_global(program, mem_ctx))
|
|
any_change = GL_TRUE;
|
|
if (_mesa_remove_extra_moves(program, mem_ctx))
|
|
any_change = GL_TRUE;
|
|
if (_mesa_remove_dead_code_local(program, mem_ctx))
|
|
any_change = GL_TRUE;
|
|
|
|
any_change = _mesa_constant_fold(program) || any_change;
|
|
_mesa_reallocate_registers(program);
|
|
} while (any_change);
|
|
}
|
|
|