nv50: get access to primitive input space
Vertex data in geometry programs is located in p[] space. The base address in p[] for vertex i is located in vertex attribute space, i.e. a[i << 2]. This means p[] is always accessed with an address register, and I had to to mess with their allocation once again. Also fixes negative offsets e.g. CONST[ADDR[0].x - 3].
This commit is contained in:
parent
8d24273750
commit
e791e6f27c
|
@ -92,6 +92,9 @@ struct nv50_reg {
|
|||
|
||||
int rhw; /* result hw for FP outputs, or interpolant index */
|
||||
int acc; /* instruction where this reg is last read (first insn == 1) */
|
||||
|
||||
int vtx; /* vertex index, for GP inputs (TGSI Dimension.Index) */
|
||||
int indirect[2]; /* index into pc->addr, or -1 */
|
||||
};
|
||||
|
||||
#define NV50_MOD_NEG 1
|
||||
|
@ -135,7 +138,6 @@ struct nv50_pc {
|
|||
int immd_nr;
|
||||
struct nv50_reg **addr;
|
||||
int addr_nr;
|
||||
uint8_t addr_alloc; /* set bit indicates used for TGSI_FILE_ADDRESS */
|
||||
|
||||
struct nv50_reg *temp_temp[16];
|
||||
struct nv50_program_exec *temp_temp_exec[16];
|
||||
|
@ -171,6 +173,8 @@ struct nv50_pc {
|
|||
uint8_t edgeflag_out;
|
||||
};
|
||||
|
||||
static struct nv50_reg *get_address_reg(struct nv50_pc *, struct nv50_reg *);
|
||||
|
||||
static INLINE void
|
||||
ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw)
|
||||
{
|
||||
|
@ -179,7 +183,9 @@ ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw)
|
|||
reg->hw = hw;
|
||||
reg->mod = 0;
|
||||
reg->rhw = -1;
|
||||
reg->vtx = -1;
|
||||
reg->acc = 0;
|
||||
reg->indirect[0] = reg->indirect[1] = -1;
|
||||
}
|
||||
|
||||
static INLINE unsigned
|
||||
|
@ -197,7 +203,8 @@ terminate_mbb(struct nv50_pc *pc)
|
|||
|
||||
/* remove records of temporary address register values */
|
||||
for (i = 0; i < NV50_SU_MAX_ADDR; ++i)
|
||||
pc->r_addr[i].rhw = -1;
|
||||
if (pc->r_addr[i].index < 0)
|
||||
pc->r_addr[i].acc = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -260,6 +267,7 @@ reg_instance(struct nv50_pc *pc, struct nv50_reg *reg)
|
|||
if (reg) {
|
||||
alloc_reg(pc, reg);
|
||||
*ri = *reg;
|
||||
reg->indirect[0] = reg->indirect[1] = -1;
|
||||
reg->mod = 0;
|
||||
}
|
||||
return ri;
|
||||
|
@ -525,11 +533,33 @@ set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
|
|||
static INLINE void
|
||||
set_addr(struct nv50_program_exec *e, struct nv50_reg *a)
|
||||
{
|
||||
assert(a->type == P_ADDR);
|
||||
|
||||
assert(!(e->inst[0] & 0x0c000000));
|
||||
assert(!(e->inst[1] & 0x00000004));
|
||||
|
||||
e->inst[0] |= (a->hw & 3) << 26;
|
||||
e->inst[1] |= (a->hw >> 2) << 2;
|
||||
e->inst[1] |= a->hw & 4;
|
||||
}
|
||||
|
||||
static void
|
||||
emit_arl(struct nv50_pc *, struct nv50_reg *, struct nv50_reg *, uint8_t);
|
||||
|
||||
static void
|
||||
emit_shl_imm(struct nv50_pc *, struct nv50_reg *, struct nv50_reg *, int);
|
||||
|
||||
static void
|
||||
emit_mov_from_addr(struct nv50_pc *pc, struct nv50_reg *dst,
|
||||
struct nv50_reg *src)
|
||||
{
|
||||
struct nv50_program_exec *e = exec(pc);
|
||||
|
||||
e->inst[1] = 0x40000000;
|
||||
set_long(pc, e);
|
||||
set_dst(pc, dst, e);
|
||||
set_addr(e, src);
|
||||
|
||||
emit(pc, e);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -548,72 +578,6 @@ emit_add_addr_imm(struct nv50_pc *pc, struct nv50_reg *dst,
|
|||
emit(pc, e);
|
||||
}
|
||||
|
||||
static struct nv50_reg *
|
||||
alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref)
|
||||
{
|
||||
struct nv50_reg *a_tgsi = NULL, *a = NULL;
|
||||
int i;
|
||||
uint8_t avail = ~pc->addr_alloc;
|
||||
|
||||
if (!ref) {
|
||||
/* allocate for TGSI_FILE_ADDRESS */
|
||||
while (avail) {
|
||||
i = ffs(avail) - 1;
|
||||
|
||||
if (pc->r_addr[i].rhw < 0 ||
|
||||
pc->r_addr[i].acc != pc->insn_cur) {
|
||||
pc->addr_alloc |= (1 << i);
|
||||
|
||||
pc->r_addr[i].rhw = -1;
|
||||
pc->r_addr[i].index = i;
|
||||
return &pc->r_addr[i];
|
||||
}
|
||||
avail &= ~(1 << i);
|
||||
}
|
||||
assert(0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Allocate and set an address reg so we can access 'ref'.
|
||||
*
|
||||
* If and r_addr->index will be -1 or the hw index the value
|
||||
* value in rhw is relative to. If rhw < 0, the reg has not
|
||||
* been initialized or is in use for TGSI_FILE_ADDRESS.
|
||||
*/
|
||||
while (avail) { /* only consider regs that are not TGSI */
|
||||
i = ffs(avail) - 1;
|
||||
avail &= ~(1 << i);
|
||||
|
||||
if ((!a || a->rhw >= 0) && pc->r_addr[i].rhw < 0) {
|
||||
/* prefer an usused reg with low hw index */
|
||||
a = &pc->r_addr[i];
|
||||
continue;
|
||||
}
|
||||
if (!a && pc->r_addr[i].acc != pc->insn_cur)
|
||||
a = &pc->r_addr[i];
|
||||
|
||||
if (ref->hw - pc->r_addr[i].rhw >= 128)
|
||||
continue;
|
||||
|
||||
if ((ref->acc >= 0 && pc->r_addr[i].index < 0) ||
|
||||
(ref->acc < 0 && pc->r_addr[i].index == ref->index)) {
|
||||
pc->r_addr[i].acc = pc->insn_cur;
|
||||
return &pc->r_addr[i];
|
||||
}
|
||||
}
|
||||
assert(a);
|
||||
|
||||
if (ref->acc < 0)
|
||||
a_tgsi = pc->addr[ref->index];
|
||||
|
||||
emit_add_addr_imm(pc, a, a_tgsi, (ref->hw & ~0x7f) * 4);
|
||||
|
||||
a->rhw = ref->hw & ~0x7f;
|
||||
a->acc = pc->insn_cur;
|
||||
a->index = a_tgsi ? ref->index : -1;
|
||||
return a;
|
||||
}
|
||||
|
||||
#define INTERP_LINEAR 0
|
||||
#define INTERP_FLAT 1
|
||||
#define INTERP_PERSPECTIVE 2
|
||||
|
@ -657,12 +621,12 @@ set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s,
|
|||
e->param.shift = s;
|
||||
e->param.mask = m << (s % 32);
|
||||
|
||||
if (src->hw > 127)
|
||||
set_addr(e, alloc_addr(pc, src));
|
||||
if (src->hw < 0 || src->hw > 127) /* need (additional) address reg */
|
||||
set_addr(e, get_address_reg(pc, src));
|
||||
else
|
||||
if (src->acc < 0) {
|
||||
assert(src->type == P_CONST);
|
||||
set_addr(e, pc->addr[src->index]);
|
||||
set_addr(e, pc->addr[src->indirect[0]]);
|
||||
}
|
||||
|
||||
e->inst[1] |= (((src->type == P_IMMD) ? 0 : 1) << 22);
|
||||
|
@ -694,6 +658,12 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
|
|||
if (src->type == P_ATTR) {
|
||||
set_long(pc, e);
|
||||
e->inst[1] |= 0x00200000;
|
||||
|
||||
if (src->vtx >= 0) {
|
||||
/* indirect (vertex base + c) load from p[] */
|
||||
e->inst[0] |= 0x01800000;
|
||||
set_addr(e, get_address_reg(pc, src));
|
||||
}
|
||||
}
|
||||
|
||||
alloc_reg(pc, src);
|
||||
|
@ -808,6 +778,11 @@ set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
|
|||
if (src->type == P_ATTR) {
|
||||
set_long(pc, e);
|
||||
e->inst[1] |= 0x00200000;
|
||||
|
||||
if (src->vtx >= 0) {
|
||||
e->inst[0] |= 0x01800000; /* src from p[] */
|
||||
set_addr(e, get_address_reg(pc, src));
|
||||
}
|
||||
} else
|
||||
if (src->type == P_CONST || src->type == P_IMMD) {
|
||||
struct nv50_reg *temp = temp_temp(pc, e);
|
||||
|
@ -832,13 +807,13 @@ set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
|
|||
src = temp;
|
||||
} else
|
||||
if (src->type == P_CONST || src->type == P_IMMD) {
|
||||
assert(!(e->inst[0] & 0x00800000));
|
||||
if (e->inst[0] & 0x01000000) {
|
||||
if (e->inst[0] & 0x01800000) {
|
||||
struct nv50_reg *temp = temp_temp(pc, e);
|
||||
|
||||
emit_mov(pc, temp, src);
|
||||
src = temp;
|
||||
} else {
|
||||
assert(!(e->inst[0] & 0x00800000));
|
||||
set_data(pc, src, 0x7f, 16, e);
|
||||
e->inst[0] |= 0x00800000;
|
||||
}
|
||||
|
@ -862,13 +837,13 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
|
|||
src = temp;
|
||||
} else
|
||||
if (src->type == P_CONST || src->type == P_IMMD) {
|
||||
assert(!(e->inst[0] & 0x01000000));
|
||||
if (e->inst[0] & 0x00800000) {
|
||||
if (e->inst[0] & 0x01800000) {
|
||||
struct nv50_reg *temp = temp_temp(pc, e);
|
||||
|
||||
emit_mov(pc, temp, src);
|
||||
src = temp;
|
||||
} else {
|
||||
assert(!(e->inst[0] & 0x01000000));
|
||||
set_data(pc, src, 0x7f, 32+14, e);
|
||||
e->inst[0] |= 0x01000000;
|
||||
}
|
||||
|
@ -997,11 +972,125 @@ emit_arl(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src,
|
|||
|
||||
e->inst[0] |= dst->hw << 2;
|
||||
e->inst[0] |= s << 16; /* shift left */
|
||||
set_src_0_restricted(pc, src, e);
|
||||
set_src_0(pc, src, e);
|
||||
|
||||
emit(pc, e);
|
||||
}
|
||||
|
||||
static boolean
|
||||
address_reg_suitable(struct nv50_reg *a, struct nv50_reg *r)
|
||||
{
|
||||
if (!r)
|
||||
return FALSE;
|
||||
|
||||
if (r->vtx != a->vtx)
|
||||
return FALSE;
|
||||
if (r->vtx >= 0)
|
||||
return (r->indirect[1] == a->indirect[1]);
|
||||
|
||||
if (r->hw < a->rhw || (r->hw - a->rhw) >= 128)
|
||||
return FALSE;
|
||||
|
||||
if (a->index >= 0)
|
||||
return (a->index == r->indirect[0]);
|
||||
return (a->indirect[0] == r->indirect[0]);
|
||||
}
|
||||
|
||||
static void
|
||||
load_vertex_base(struct nv50_pc *pc, struct nv50_reg *dst,
|
||||
struct nv50_reg *a, int shift)
|
||||
{
|
||||
struct nv50_reg mem, *temp;
|
||||
|
||||
ctor_reg(&mem, P_ATTR, -1, dst->vtx);
|
||||
|
||||
assert(dst->type == P_ADDR);
|
||||
if (!a) {
|
||||
emit_arl(pc, dst, &mem, 0);
|
||||
return;
|
||||
}
|
||||
temp = alloc_temp(pc, NULL);
|
||||
|
||||
if (shift) {
|
||||
emit_mov_from_addr(pc, temp, a);
|
||||
if (shift < 0)
|
||||
emit_shl_imm(pc, temp, temp, shift);
|
||||
emit_arl(pc, dst, temp, MAX2(shift, 0));
|
||||
}
|
||||
emit_mov(pc, temp, &mem);
|
||||
set_addr(pc->p->exec_tail, dst);
|
||||
|
||||
emit_arl(pc, dst, temp, 0);
|
||||
free_temp(pc, temp);
|
||||
}
|
||||
|
||||
/* case (ref == NULL): allocate address register for TGSI_FILE_ADDRESS
|
||||
* case (vtx >= 0, acc >= 0): load vertex base from a[vtx * 4] to $aX
|
||||
* case (vtx >= 0, acc < 0): load vertex base from s[$aY + vtx * 4] to $aX
|
||||
* case (vtx < 0, acc >= 0): memory address too high to encode
|
||||
* case (vtx < 0, acc < 0): get source register for TGSI_FILE_ADDRESS
|
||||
*/
|
||||
static struct nv50_reg *
|
||||
get_address_reg(struct nv50_pc *pc, struct nv50_reg *ref)
|
||||
{
|
||||
int i;
|
||||
struct nv50_reg *a_ref, *a = NULL;
|
||||
|
||||
for (i = 0; i < NV50_SU_MAX_ADDR; ++i) {
|
||||
if (pc->r_addr[i].acc == 0)
|
||||
a = &pc->r_addr[i]; /* an unused address reg */
|
||||
else
|
||||
if (address_reg_suitable(&pc->r_addr[i], ref)) {
|
||||
pc->r_addr[i].acc = pc->insn_cur;
|
||||
return &pc->r_addr[i];
|
||||
} else
|
||||
if (!a && pc->r_addr[i].index < 0 &&
|
||||
pc->r_addr[i].acc < pc->insn_cur)
|
||||
a = &pc->r_addr[i];
|
||||
}
|
||||
if (!a) {
|
||||
/* We'll be able to spill address regs when this
|
||||
* mess is replaced with a proper compiler ...
|
||||
*/
|
||||
NOUVEAU_ERR("out of address regs\n");
|
||||
abort();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* initialize and reserve for this TGSI instruction */
|
||||
a->rhw = 0;
|
||||
a->index = a->indirect[0] = a->indirect[1] = -1;
|
||||
a->acc = pc->insn_cur;
|
||||
|
||||
if (!ref) {
|
||||
a->vtx = -1;
|
||||
return a;
|
||||
}
|
||||
a->vtx = ref->vtx;
|
||||
|
||||
/* now put in the correct value ... */
|
||||
|
||||
if (ref->vtx >= 0) {
|
||||
a->indirect[1] = ref->indirect[1];
|
||||
|
||||
/* For an indirect vertex index, we need to shift address right
|
||||
* by 2, the address register will contain vtx * 16, we need to
|
||||
* load from a[vtx * 4].
|
||||
*/
|
||||
load_vertex_base(pc, a, (ref->acc < 0) ?
|
||||
pc->addr[ref->indirect[1]] : NULL, -2);
|
||||
} else {
|
||||
assert(ref->acc < 0 || ref->indirect[0] < 0);
|
||||
|
||||
a->rhw = ref->hw & ~0x7f;
|
||||
a->indirect[0] = ref->indirect[0];
|
||||
a_ref = (ref->acc < 0) ? pc->addr[ref->indirect[0]] : NULL;
|
||||
|
||||
emit_add_addr_imm(pc, a, a_ref, a->rhw * 4);
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
#define NV50_MAX_F32 0x880
|
||||
#define NV50_MAX_S32 0x08c
|
||||
#define NV50_MAX_U32 0x084
|
||||
|
@ -2171,8 +2260,9 @@ tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
|
|||
{
|
||||
struct nv50_reg *r = pc->addr[dst->Register.Index * 4 + c];
|
||||
if (!r) {
|
||||
r = alloc_addr(pc, NULL);
|
||||
pc->addr[dst->Register.Index * 4 + c] = r;
|
||||
r = get_address_reg(pc, NULL);
|
||||
r->index = dst->Register.Index * 4 + c;
|
||||
pc->addr[r->index] = r;
|
||||
}
|
||||
assert(r);
|
||||
return r;
|
||||
|
@ -2208,6 +2298,18 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
|
|||
switch (src->Register.File) {
|
||||
case TGSI_FILE_INPUT:
|
||||
r = &pc->attr[src->Register.Index * 4 + c];
|
||||
|
||||
if (!src->Dimension.Dimension)
|
||||
break;
|
||||
r = reg_instance(pc, r);
|
||||
r->vtx = src->Dimension.Index;
|
||||
|
||||
if (!src->Dimension.Indirect)
|
||||
break;
|
||||
swz = tgsi_util_get_src_register_swizzle(
|
||||
&src->DimIndirect, 0);
|
||||
r->acc = -1;
|
||||
r->indirect[1] = src->DimIndirect.Index * 4 + swz;
|
||||
break;
|
||||
case TGSI_FILE_TEMPORARY:
|
||||
r = &pc->temp[src->Register.Index * 4 + c];
|
||||
|
@ -2221,12 +2323,12 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
|
|||
* use the index field to select the address reg.
|
||||
*/
|
||||
r = reg_instance(pc, NULL);
|
||||
ctor_reg(r, P_CONST, -1, src->Register.Index * 4 + c);
|
||||
|
||||
swz = tgsi_util_get_src_register_swizzle(
|
||||
&src->Indirect, 0);
|
||||
ctor_reg(r, P_CONST,
|
||||
src->Indirect.Index * 4 + swz,
|
||||
src->Register.Index * 4 + c);
|
||||
&src->Indirect, 0);
|
||||
r->acc = -1;
|
||||
r->indirect[0] = src->Indirect.Index * 4 + swz;
|
||||
break;
|
||||
case TGSI_FILE_IMMEDIATE:
|
||||
r = &pc->immd[src->Register.Index * 4 + c];
|
||||
|
@ -2273,7 +2375,7 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
|
|||
r->mod |= mod & NV50_MOD_I32;
|
||||
|
||||
assert(r);
|
||||
if (r->acc >= 0 && r != temp)
|
||||
if (r->acc >= 0 && r->vtx < 0 && r != temp)
|
||||
return reg_instance(pc, r); /* will clear r->mod */
|
||||
return r;
|
||||
}
|
||||
|
@ -2495,10 +2597,14 @@ nv50_program_tx_insn(struct nv50_pc *pc,
|
|||
}
|
||||
break;
|
||||
case TGSI_OPCODE_ARL:
|
||||
assert(src[0][0]);
|
||||
temp = temp_temp(pc, NULL);
|
||||
emit_cvt(pc, temp, src[0][0], -1, CVT_FLOOR | CVT_S32_F32);
|
||||
emit_arl(pc, dst[0], temp, 4);
|
||||
for (c = 0; c < 4; c++) {
|
||||
if (!(mask & (1 << c)))
|
||||
continue;
|
||||
emit_cvt(pc, temp, src[0][c], -1,
|
||||
CVT_FLOOR | CVT_S32_F32);
|
||||
emit_arl(pc, dst[c], temp, 4);
|
||||
}
|
||||
break;
|
||||
case TGSI_OPCODE_BGNLOOP:
|
||||
pc->loop_brka[pc->loop_lvl] = emit_breakaddr(pc);
|
||||
|
@ -2630,6 +2736,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
|
|||
break;
|
||||
case TGSI_OPCODE_ENDSUB:
|
||||
assert(pc->in_subroutine);
|
||||
terminate_mbb(pc);
|
||||
pc->in_subroutine = FALSE;
|
||||
break;
|
||||
case TGSI_OPCODE_EX2:
|
||||
|
@ -3032,6 +3139,8 @@ nv50_program_tx_insn(struct nv50_pc *pc,
|
|||
emit_nop(pc);
|
||||
|
||||
pc->p->exec_tail->inst[1] |= 1; /* set exit bit */
|
||||
|
||||
terminate_mbb(pc);
|
||||
break;
|
||||
default:
|
||||
NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode);
|
||||
|
@ -3717,7 +3826,7 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p)
|
|||
return FALSE;
|
||||
}
|
||||
for (i = 0; i < NV50_SU_MAX_ADDR; ++i)
|
||||
ctor_reg(&pc->r_addr[i], P_ADDR, -256, i + 1);
|
||||
ctor_reg(&pc->r_addr[i], P_ADDR, -1, i + 1);
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue