mesa/arbprog: Use nir_lower_io_to_temporaries.
This replaces our mesa_remove_output_reads(), which in turn GCs some other ARB program transformation code. Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17528>
This commit is contained in:
parent
153f7b8852
commit
c13dbf6ae9
|
@ -340,55 +340,6 @@ _mesa_reference_program_(struct gl_context *ctx,
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Insert 'count' NOP instructions at 'start' in the given program.
|
||||
* Adjust branch targets accordingly.
|
||||
*/
|
||||
GLboolean
|
||||
_mesa_insert_instructions(struct gl_program *prog, GLuint start, GLuint count)
|
||||
{
|
||||
const GLuint origLen = prog->arb.NumInstructions;
|
||||
const GLuint newLen = origLen + count;
|
||||
struct prog_instruction *newInst;
|
||||
GLuint i;
|
||||
|
||||
/* adjust branches */
|
||||
for (i = 0; i < prog->arb.NumInstructions; i++) {
|
||||
struct prog_instruction *inst = prog->arb.Instructions + i;
|
||||
if (inst->BranchTarget > 0) {
|
||||
if ((GLuint)inst->BranchTarget >= start) {
|
||||
inst->BranchTarget += count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Alloc storage for new instructions */
|
||||
newInst = rzalloc_array(prog, struct prog_instruction, newLen);
|
||||
if (!newInst) {
|
||||
return GL_FALSE;
|
||||
}
|
||||
|
||||
/* Copy 'start' instructions into new instruction buffer */
|
||||
_mesa_copy_instructions(newInst, prog->arb.Instructions, start);
|
||||
|
||||
/* init the new instructions */
|
||||
_mesa_init_instructions(newInst + start, count);
|
||||
|
||||
/* Copy the remaining/tail instructions to new inst buffer */
|
||||
_mesa_copy_instructions(newInst + start + count,
|
||||
prog->arb.Instructions + start,
|
||||
origLen - start);
|
||||
|
||||
/* free old instructions */
|
||||
ralloc_free(prog->arb.Instructions);
|
||||
|
||||
/* install new instructions */
|
||||
prog->arb.Instructions = newInst;
|
||||
prog->arb.NumInstructions = newLen;
|
||||
|
||||
return GL_TRUE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete 'count' instructions at 'start' in the given program.
|
||||
* Adjust branch targets accordingly.
|
||||
|
@ -437,68 +388,6 @@ _mesa_delete_instructions(struct gl_program *prog, GLuint start, GLuint count,
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Populate the 'used' array with flags indicating which registers (TEMPs,
|
||||
* INPUTs, OUTPUTs, etc, are used by the given program.
|
||||
* \param file type of register to scan for
|
||||
* \param used returns true/false flags for in use / free
|
||||
* \param usedSize size of the 'used' array
|
||||
*/
|
||||
void
|
||||
_mesa_find_used_registers(const struct gl_program *prog,
|
||||
gl_register_file file,
|
||||
GLboolean used[], GLuint usedSize)
|
||||
{
|
||||
GLuint i, j;
|
||||
|
||||
memset(used, 0, usedSize);
|
||||
|
||||
for (i = 0; i < prog->arb.NumInstructions; i++) {
|
||||
const struct prog_instruction *inst = prog->arb.Instructions + i;
|
||||
const GLuint n = _mesa_num_inst_src_regs(inst->Opcode);
|
||||
|
||||
if (inst->DstReg.File == file) {
|
||||
assert(inst->DstReg.Index < usedSize);
|
||||
if(inst->DstReg.Index < usedSize)
|
||||
used[inst->DstReg.Index] = GL_TRUE;
|
||||
}
|
||||
|
||||
for (j = 0; j < n; j++) {
|
||||
if (inst->SrcReg[j].File == file) {
|
||||
assert(inst->SrcReg[j].Index < (GLint) usedSize);
|
||||
if (inst->SrcReg[j].Index < (GLint) usedSize)
|
||||
used[inst->SrcReg[j].Index] = GL_TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Scan the given 'used' register flag array for the first entry
|
||||
* that's >= firstReg.
|
||||
* \param used vector of flags indicating registers in use (as returned
|
||||
* by _mesa_find_used_registers())
|
||||
* \param usedSize size of the 'used' array
|
||||
* \param firstReg first register to start searching at
|
||||
* \return index of unused register, or -1 if none.
|
||||
*/
|
||||
GLint
|
||||
_mesa_find_free_register(const GLboolean used[],
|
||||
GLuint usedSize, GLuint firstReg)
|
||||
{
|
||||
GLuint i;
|
||||
|
||||
assert(firstReg < usedSize);
|
||||
|
||||
for (i = firstReg; i < usedSize; i++)
|
||||
if (!used[i])
|
||||
return i;
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
/* Gets the minimum number of shader invocations per fragment.
|
||||
* This function is useful to determine if we need to do per
|
||||
* sample shading or per fragment shading.
|
||||
|
|
|
@ -90,22 +90,10 @@ _mesa_reference_program(struct gl_context *ctx,
|
|||
_mesa_reference_program_(ctx, ptr, prog);
|
||||
}
|
||||
|
||||
extern GLboolean
|
||||
_mesa_insert_instructions(struct gl_program *prog, GLuint start, GLuint count);
|
||||
|
||||
extern GLboolean
|
||||
_mesa_delete_instructions(struct gl_program *prog, GLuint start, GLuint count,
|
||||
void *mem_ctx);
|
||||
|
||||
extern void
|
||||
_mesa_find_used_registers(const struct gl_program *prog,
|
||||
gl_register_file file,
|
||||
GLboolean used[], GLuint usedSize);
|
||||
|
||||
extern GLint
|
||||
_mesa_find_free_register(const GLboolean used[],
|
||||
GLuint maxRegs, GLuint firstReg);
|
||||
|
||||
extern GLint
|
||||
_mesa_get_min_invocations_per_fragment(struct gl_context *ctx,
|
||||
const struct gl_program *prog);
|
||||
|
|
|
@ -411,98 +411,3 @@ _mesa_append_fog_code(struct gl_context *ctx, struct gl_program *fprog,
|
|||
fprog->info.inputs_read |= VARYING_BIT_FOGC;
|
||||
assert(fprog->info.outputs_written & (1 << FRAG_RESULT_COLOR));
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Scan/rewrite program to remove reads of custom (output) registers.
|
||||
* The passed type has to be PROGRAM_OUTPUT.
|
||||
* On some hardware, trying to read an output register causes trouble.
|
||||
* So, rewrite the program to use a temporary register in this case.
|
||||
*/
|
||||
void
|
||||
_mesa_remove_output_reads(struct gl_program *prog, gl_register_file type)
|
||||
{
|
||||
GLuint i;
|
||||
GLint outputMap[VARYING_SLOT_MAX];
|
||||
GLuint numVaryingReads = 0;
|
||||
GLboolean usedTemps[MAX_PROGRAM_TEMPS];
|
||||
GLuint firstTemp = 0;
|
||||
|
||||
_mesa_find_used_registers(prog, PROGRAM_TEMPORARY,
|
||||
usedTemps, MAX_PROGRAM_TEMPS);
|
||||
|
||||
assert(type == PROGRAM_OUTPUT);
|
||||
|
||||
for (i = 0; i < VARYING_SLOT_MAX; i++)
|
||||
outputMap[i] = -1;
|
||||
|
||||
/* look for instructions which read from varying vars */
|
||||
for (i = 0; i < prog->arb.NumInstructions; i++) {
|
||||
struct prog_instruction *inst = prog->arb.Instructions + i;
|
||||
const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
|
||||
GLuint j;
|
||||
for (j = 0; j < numSrc; j++) {
|
||||
if (inst->SrcReg[j].File == type) {
|
||||
/* replace the read with a temp reg */
|
||||
const GLuint var = inst->SrcReg[j].Index;
|
||||
if (outputMap[var] == -1) {
|
||||
numVaryingReads++;
|
||||
outputMap[var] = _mesa_find_free_register(usedTemps,
|
||||
MAX_PROGRAM_TEMPS,
|
||||
firstTemp);
|
||||
firstTemp = outputMap[var] + 1;
|
||||
}
|
||||
inst->SrcReg[j].File = PROGRAM_TEMPORARY;
|
||||
inst->SrcReg[j].Index = outputMap[var];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (numVaryingReads == 0)
|
||||
return; /* nothing to be done */
|
||||
|
||||
/* look for instructions which write to the varying vars identified above */
|
||||
for (i = 0; i < prog->arb.NumInstructions; i++) {
|
||||
struct prog_instruction *inst = prog->arb.Instructions + i;
|
||||
if (inst->DstReg.File == type &&
|
||||
outputMap[inst->DstReg.Index] >= 0) {
|
||||
/* change inst to write to the temp reg, instead of the varying */
|
||||
inst->DstReg.File = PROGRAM_TEMPORARY;
|
||||
inst->DstReg.Index = outputMap[inst->DstReg.Index];
|
||||
}
|
||||
}
|
||||
|
||||
/* insert new instructions to copy the temp vars to the varying vars */
|
||||
{
|
||||
struct prog_instruction *inst;
|
||||
GLint endPos, var;
|
||||
|
||||
/* Look for END instruction and insert the new varying writes */
|
||||
endPos = -1;
|
||||
for (i = 0; i < prog->arb.NumInstructions; i++) {
|
||||
struct prog_instruction *inst = prog->arb.Instructions + i;
|
||||
if (inst->Opcode == OPCODE_END) {
|
||||
endPos = i;
|
||||
_mesa_insert_instructions(prog, i, numVaryingReads);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assert(endPos >= 0);
|
||||
|
||||
/* insert new MOV instructions here */
|
||||
inst = prog->arb.Instructions + endPos;
|
||||
for (var = 0; var < VARYING_SLOT_MAX; var++) {
|
||||
if (outputMap[var] >= 0) {
|
||||
/* MOV VAR[var], TEMP[tmp]; */
|
||||
inst->Opcode = OPCODE_MOV;
|
||||
inst->DstReg.File = type;
|
||||
inst->DstReg.Index = var;
|
||||
inst->SrcReg[0].File = PROGRAM_TEMPORARY;
|
||||
inst->SrcReg[0].Index = outputMap[var];
|
||||
inst++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -44,9 +44,6 @@ extern void
|
|||
_mesa_append_fog_code(struct gl_context *ctx, struct gl_program *fprog,
|
||||
GLenum fog_mode, GLboolean saturate);
|
||||
|
||||
extern void
|
||||
_mesa_remove_output_reads(struct gl_program *prog, gl_register_file type);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -379,6 +379,14 @@ st_prog_to_nir_postprocess(struct st_context *st, nir_shader *nir,
|
|||
NIR_PASS_V(nir, nir_lower_regs_to_ssa);
|
||||
nir_validate_shader(nir, "after st/ptn lower_regs_to_ssa");
|
||||
|
||||
/* Lower outputs to temporaries to avoid reading from output variables (which
|
||||
* is permitted by the language but generally not implemented in HW).
|
||||
*/
|
||||
NIR_PASS_V(nir, nir_lower_io_to_temporaries,
|
||||
nir_shader_get_entrypoint(nir),
|
||||
true, false);
|
||||
NIR_PASS_V(nir, nir_lower_global_vars_to_local);
|
||||
|
||||
NIR_PASS_V(nir, st_nir_lower_wpos_ytransform, prog, screen);
|
||||
NIR_PASS_V(nir, nir_lower_system_values);
|
||||
NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
|
||||
|
@ -572,8 +580,6 @@ st_translate_vertex_program(struct st_context *st,
|
|||
if (prog->arb.IsPositionInvariant)
|
||||
_mesa_insert_mvp_code(st->ctx, prog);
|
||||
|
||||
_mesa_remove_output_reads(prog, PROGRAM_OUTPUT);
|
||||
|
||||
/* This determines which states will be updated when the assembly
|
||||
* shader is bound.
|
||||
*/
|
||||
|
@ -822,9 +828,6 @@ static bool
|
|||
st_translate_fragment_program(struct st_context *st,
|
||||
struct gl_program *fp)
|
||||
{
|
||||
/* Non-GLSL programs: */
|
||||
_mesa_remove_output_reads(fp, PROGRAM_OUTPUT);
|
||||
|
||||
/* This determines which states will be updated when the assembly
|
||||
* shader is bound.
|
||||
*
|
||||
|
|
Loading…
Reference in New Issue