freedreno/ir3: don't offset inloc by 8
On a3xx/a4xx, the SP_VS_VPC_DST_REG.OUTLOCn is offset by 8, so we used to add this offset into fs->inputs[n].inloc. But a5xx drops this extra offset-by-8. So instead make inloc zero based and add the offset when we emit OUTLOCn values (for the gen's that need the offset). Signed-off-by: Rob Clark <robdclark@gmail.com>
This commit is contained in:
parent
7a59157287
commit
728e2c4d38
|
@ -299,10 +299,10 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
|
|||
|
||||
OUT_PKT0(ring, REG_A3XX_SP_VS_VPC_DST_REG(i), 1);
|
||||
|
||||
reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc);
|
||||
reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc);
|
||||
reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc);
|
||||
reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc);
|
||||
reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc + 8);
|
||||
reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc + 8);
|
||||
reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc + 8);
|
||||
reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc + 8);
|
||||
|
||||
OUT_RING(ring, reg);
|
||||
}
|
||||
|
@ -391,10 +391,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
|
|||
*/
|
||||
unsigned compmask = fp->inputs[j].compmask;
|
||||
|
||||
/* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG
|
||||
* instead.. rather than -8 everywhere else..
|
||||
*/
|
||||
uint32_t inloc = fp->inputs[j].inloc - 8;
|
||||
uint32_t inloc = fp->inputs[j].inloc;
|
||||
|
||||
if ((fp->inputs[j].interpolate == INTERP_MODE_FLAT) ||
|
||||
(fp->inputs[j].rasterflat && emit->rasterflat)) {
|
||||
|
|
|
@ -366,10 +366,10 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
|
|||
|
||||
OUT_PKT0(ring, REG_A4XX_SP_VS_VPC_DST_REG(i), 1);
|
||||
|
||||
reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc);
|
||||
reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc);
|
||||
reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc);
|
||||
reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc);
|
||||
reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc + 8);
|
||||
reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc + 8);
|
||||
reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc + 8);
|
||||
reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc + 8);
|
||||
|
||||
OUT_RING(ring, reg);
|
||||
}
|
||||
|
@ -504,10 +504,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
|
|||
*/
|
||||
unsigned compmask = s[FS].v->inputs[j].compmask;
|
||||
|
||||
/* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG
|
||||
* instead.. rather than -8 everywhere else..
|
||||
*/
|
||||
uint32_t inloc = s[FS].v->inputs[j].inloc - 8;
|
||||
uint32_t inloc = s[FS].v->inputs[j].inloc;
|
||||
|
||||
if ((s[FS].v->inputs[j].interpolate == INTERP_MODE_FLAT) ||
|
||||
(s[FS].v->inputs[j].rasterflat && emit->rasterflat)) {
|
||||
|
|
|
@ -2460,7 +2460,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
|
|||
for (i = 0; i < so->inputs_count; i++) {
|
||||
unsigned j, regid = ~0, compmask = 0;
|
||||
so->inputs[i].ncomp = 0;
|
||||
so->inputs[i].inloc = inloc + 8;
|
||||
so->inputs[i].inloc = inloc;
|
||||
for (j = 0; j < 4; j++) {
|
||||
struct ir3_instruction *in = inputs[(i*4) + j];
|
||||
if (in && !(in->flags & IR3_INSTR_UNUSED)) {
|
||||
|
|
|
@ -181,16 +181,10 @@ struct ir3_shader_variant {
|
|||
uint8_t regid;
|
||||
uint8_t compmask;
|
||||
uint8_t ncomp;
|
||||
/* In theory inloc of fs should match outloc of vs. Or
|
||||
* rather the outloc of the vs is 8 plus the offset passed
|
||||
* to bary.f. Presumably that +8 is to account for
|
||||
* gl_Position/gl_PointSize?
|
||||
*
|
||||
* NOTE inloc is currently aligned to 4 (we don't try
|
||||
* to pack varyings). Changing this would likely break
|
||||
* assumptions in few places (like setting up of flat
|
||||
* shading in fd3_program) so be sure to check all the
|
||||
* spots where inloc is used.
|
||||
/* location of input (ie. offset passed to bary.f, etc). This
|
||||
* matches the SP_VS_VPC_DST_REG.OUTLOCn value (a3xx and a4xx
|
||||
* have the OUTLOCn value offset by 8, presumably to account
|
||||
* for gl_Position/gl_PointSize)
|
||||
*/
|
||||
uint8_t inloc;
|
||||
/* vertex shader specific: */
|
||||
|
|
Loading…
Reference in New Issue