Move SPE register allocator to rtasm code
Move the register allocator to a common location. There is more code on the way that will make use of this interface.
This commit is contained in:
parent
297b3be25a
commit
b1525662b3
|
@ -306,6 +306,11 @@ void spe_init_func(struct spe_function *p, unsigned code_size)
|
|||
{
|
||||
p->store = align_malloc(code_size, 16);
|
||||
p->csr = p->store;
|
||||
|
||||
/* Conservatively treat R0 - R2 and R80 - R127 as non-volatile.
|
||||
*/
|
||||
p->regs[0] = ~7;
|
||||
p->regs[1] = (1U << (80 - 64)) - 1;
|
||||
}
|
||||
|
||||
|
||||
|
@ -317,6 +322,48 @@ void spe_release_func(struct spe_function *p)
|
|||
}
|
||||
|
||||
|
||||
int spe_allocate_available_register(struct spe_function *p)
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < 128; i++) {
|
||||
const uint64_t mask = (1ULL << (i % 128));
|
||||
const unsigned idx = i / 128;
|
||||
|
||||
if ((p->regs[idx] & mask) != 0) {
|
||||
p->regs[idx] &= ~mask;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
int spe_allocate_register(struct spe_function *p, int reg)
|
||||
{
|
||||
const unsigned idx = reg / 128;
|
||||
const unsigned bit = reg % 128;
|
||||
|
||||
assert((p->regs[idx] & (1ULL << bit)) != 0);
|
||||
|
||||
p->regs[idx] &= ~(1ULL << bit);
|
||||
return reg;
|
||||
}
|
||||
|
||||
|
||||
void spe_release_register(struct spe_function *p, int reg)
|
||||
{
|
||||
const unsigned idx = reg / 128;
|
||||
const unsigned bit = reg % 128;
|
||||
|
||||
assert((p->regs[idx] & (1ULL << bit)) == 0);
|
||||
|
||||
p->regs[idx] |= (1ULL << bit);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void spe_bi(struct spe_function *p, unsigned rA, int d, int e)
|
||||
{
|
||||
emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4));
|
||||
|
|
|
@ -39,11 +39,27 @@ struct spe_function {
|
|||
uint32_t *store;
|
||||
uint32_t *csr;
|
||||
const char *fn;
|
||||
|
||||
/**
|
||||
* Mask of used / unused registers
|
||||
*
|
||||
* Each set bit corresponds to an available register. Each cleared bit
|
||||
* corresponds to an allocated register.
|
||||
*
|
||||
* \sa
|
||||
* spe_allocate_register, spe_allocate_available_register,
|
||||
* spe_release_register
|
||||
*/
|
||||
uint64_t regs[2];
|
||||
};
|
||||
|
||||
extern void spe_init_func(struct spe_function *p, unsigned code_size);
|
||||
extern void spe_release_func(struct spe_function *p);
|
||||
|
||||
extern int spe_allocate_available_register(struct spe_function *p);
|
||||
extern int spe_allocate_register(struct spe_function *p, int reg);
|
||||
extern void spe_release_register(struct spe_function *p, int reg);
|
||||
|
||||
#endif /* RTASM_PPC_SPE_H */
|
||||
|
||||
#ifndef EMIT_
|
||||
|
|
|
@ -33,46 +33,11 @@
|
|||
#include "cell_context.h"
|
||||
#include "rtasm/rtasm_ppc_spe.h"
|
||||
|
||||
typedef uint64_t register_mask;
|
||||
|
||||
int allocate_available_register(register_mask *m)
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < 64; i++) {
|
||||
const uint64_t mask = (1ULL << i);
|
||||
|
||||
if ((m[0] & mask) != 0) {
|
||||
m[0] &= ~mask;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
int allocate_register(register_mask *m, unsigned reg)
|
||||
{
|
||||
assert((m[0] & (1ULL << reg)) != 0);
|
||||
|
||||
m[0] &= ~(1ULL << reg);
|
||||
return reg;
|
||||
}
|
||||
|
||||
|
||||
void release_register(register_mask *m, unsigned reg)
|
||||
{
|
||||
assert((m[0] & (1ULL << reg)) == 0);
|
||||
|
||||
m[0] |= (1ULL << reg);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Emit a 4x4 matrix transpose operation
|
||||
*
|
||||
* \param p Function that the transpose operation is to be appended to
|
||||
* \param m Live register mask
|
||||
* \param row0 Register containing row 0 of the source matrix
|
||||
* \param row1 Register containing row 1 of the source matrix
|
||||
* \param row2 Register containing row 2 of the source matrix
|
||||
|
@ -91,15 +56,15 @@ void release_register(register_mask *m, unsigned reg)
|
|||
* This function requires that four temporary are available on entry.
|
||||
*/
|
||||
static void
|
||||
emit_matrix_transpose(struct spe_function *p, register_mask *m,
|
||||
emit_matrix_transpose(struct spe_function *p,
|
||||
unsigned row0, unsigned row1, unsigned row2,
|
||||
unsigned row3, unsigned dest_ptr,
|
||||
unsigned shuf_ptr, unsigned count)
|
||||
{
|
||||
int shuf_hi = allocate_available_register(m);
|
||||
int shuf_lo = allocate_available_register(m);
|
||||
int t1 = allocate_available_register(m);
|
||||
int t2 = allocate_available_register(m);
|
||||
int shuf_hi = spe_allocate_available_register(p);
|
||||
int shuf_lo = spe_allocate_available_register(p);
|
||||
int t1 = spe_allocate_available_register(p);
|
||||
int t2 = spe_allocate_available_register(p);
|
||||
int t3;
|
||||
int t4;
|
||||
int col0;
|
||||
|
@ -169,19 +134,19 @@ emit_matrix_transpose(struct spe_function *p, register_mask *m,
|
|||
|
||||
/* Release all of the temporary registers used.
|
||||
*/
|
||||
release_register(m, col0);
|
||||
release_register(m, col1);
|
||||
release_register(m, col2);
|
||||
release_register(m, col3);
|
||||
release_register(m, shuf_hi);
|
||||
release_register(m, shuf_lo);
|
||||
release_register(m, t2);
|
||||
release_register(m, t4);
|
||||
spe_release_register(p, col0);
|
||||
spe_release_register(p, col1);
|
||||
spe_release_register(p, col2);
|
||||
spe_release_register(p, col3);
|
||||
spe_release_register(p, shuf_hi);
|
||||
spe_release_register(p, shuf_lo);
|
||||
spe_release_register(p, t2);
|
||||
spe_release_register(p, t4);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
emit_fetch(struct spe_function *p, register_mask *m,
|
||||
emit_fetch(struct spe_function *p,
|
||||
unsigned in_ptr, unsigned *offset,
|
||||
unsigned out_ptr, unsigned shuf_ptr,
|
||||
enum pipe_format format)
|
||||
|
@ -191,11 +156,11 @@ emit_fetch(struct spe_function *p, register_mask *m,
|
|||
const unsigned type = pf_type(format);
|
||||
const unsigned bytes = pf_size_x(format);
|
||||
|
||||
int v0 = allocate_available_register(m);
|
||||
int v1 = allocate_available_register(m);
|
||||
int v2 = allocate_available_register(m);
|
||||
int v3 = allocate_available_register(m);
|
||||
int tmp = allocate_available_register(m);
|
||||
int v0 = spe_allocate_available_register(p);
|
||||
int v1 = spe_allocate_available_register(p);
|
||||
int v2 = spe_allocate_available_register(p);
|
||||
int v3 = spe_allocate_available_register(p);
|
||||
int tmp = spe_allocate_available_register(p);
|
||||
int float_zero = -1;
|
||||
int float_one = -1;
|
||||
float scale_signed = 0.0;
|
||||
|
@ -260,19 +225,19 @@ emit_fetch(struct spe_function *p, register_mask *m,
|
|||
|
||||
|
||||
if (count < 4) {
|
||||
float_one = allocate_available_register(m);
|
||||
float_one = spe_allocate_available_register(p);
|
||||
spe_il(p, float_one, 1);
|
||||
spe_cuflt(p, float_one, float_one, 0);
|
||||
|
||||
if (count < 3) {
|
||||
float_zero = allocate_available_register(m);
|
||||
float_zero = spe_allocate_available_register(p);
|
||||
spe_il(p, float_zero, 0);
|
||||
}
|
||||
}
|
||||
|
||||
release_register(m, tmp);
|
||||
spe_release_register(p, tmp);
|
||||
|
||||
emit_matrix_transpose(p, m, v0, v1, v2, v3, out_ptr, shuf_ptr, count);
|
||||
emit_matrix_transpose(p, v0, v1, v2, v3, out_ptr, shuf_ptr, count);
|
||||
|
||||
switch (count) {
|
||||
case 1:
|
||||
|
@ -284,11 +249,11 @@ emit_fetch(struct spe_function *p, register_mask *m,
|
|||
}
|
||||
|
||||
if (float_zero != -1) {
|
||||
release_register(m, float_zero);
|
||||
spe_release_register(p, float_zero);
|
||||
}
|
||||
|
||||
if (float_one != -1) {
|
||||
release_register(m, float_one);
|
||||
spe_release_register(p, float_one);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -297,7 +262,6 @@ void cell_update_vertex_fetch(struct draw_context *draw)
|
|||
{
|
||||
struct cell_context *const cell =
|
||||
(struct cell_context *) draw->driver_private;
|
||||
register_mask m = ~0;
|
||||
struct spe_function *p = &cell->attrib_fetch;
|
||||
unsigned function_index[PIPE_ATTRIB_MAX];
|
||||
unsigned unique_attr_formats;
|
||||
|
@ -338,18 +302,11 @@ void cell_update_vertex_fetch(struct draw_context *draw)
|
|||
spe_init_func(p, 136 * unique_attr_formats);
|
||||
|
||||
|
||||
/* Registers 0, 1, and 2 are reserved by the ABI.
|
||||
*/
|
||||
allocate_register(&m, 0);
|
||||
allocate_register(&m, 1);
|
||||
allocate_register(&m, 2);
|
||||
|
||||
|
||||
/* Allocate registers for the function's input parameters.
|
||||
*/
|
||||
out_ptr = allocate_register(&m, 3);
|
||||
in_ptr = allocate_register(&m, 4);
|
||||
shuf_ptr = allocate_register(&m, 5);
|
||||
out_ptr = spe_allocate_register(p, 3);
|
||||
in_ptr = spe_allocate_register(p, 4);
|
||||
shuf_ptr = spe_allocate_register(p, 5);
|
||||
|
||||
|
||||
/* Generate code for the individual attribute fetch functions.
|
||||
|
@ -362,7 +319,7 @@ void cell_update_vertex_fetch(struct draw_context *draw)
|
|||
- (void *) p->store);
|
||||
|
||||
offset = 0;
|
||||
emit_fetch(p, & m, in_ptr, &offset, out_ptr, shuf_ptr,
|
||||
emit_fetch(p, in_ptr, &offset, out_ptr, shuf_ptr,
|
||||
draw->vertex_element[i].src_format);
|
||||
spe_bi(p, 0, 0, 0);
|
||||
|
||||
|
|
Loading…
Reference in New Issue