translate: add support for 8/16-bit indices

Currently, only 32-bit indices are supported, but some use cases
translate needs support for all types.
This commit is contained in:
Luca Barbieri 2010-08-10 10:47:23 +02:00
parent 68e74f1b01
commit 4a4e29a9ab
5 changed files with 108 additions and 19 deletions

View File

@ -586,6 +586,20 @@ void x86_mov( struct x86_function *p,
emit_op_modrm( p, 0x8b, 0x89, dst, src );
}
void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR( dst, src );
emit_2ub(p, 0x0f, 0xb6);
emit_modrm(p, dst, src);
}
void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src )
{
DUMP_RR( dst, src );
emit_2ub(p, 0x0f, 0xb7);
emit_modrm(p, dst, src);
}
void x86_xor( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )

View File

@ -237,6 +237,8 @@ void x86_dec( struct x86_function *p, struct x86_reg reg );
void x86_inc( struct x86_function *p, struct x86_reg reg );
void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_movzx8( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_movzx16( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_mul( struct x86_function *p, struct x86_reg src );
void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src );

View File

@ -85,6 +85,18 @@ struct translate {
unsigned instance_id,
void *output_buffer);
void (PIPE_CDECL *run_elts16)( struct translate *,
const uint16_t *elts,
unsigned count,
unsigned instance_id,
void *output_buffer);
void (PIPE_CDECL *run_elts8)( struct translate *,
const uint8_t *elts,
unsigned count,
unsigned instance_id,
void *output_buffer);
void (PIPE_CDECL *run)( struct translate *,
unsigned start,
unsigned count,

View File

@ -441,6 +441,38 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate,
}
}
static void PIPE_CDECL generic_run_elts16( struct translate *translate,
const uint16_t *elts,
unsigned count,
unsigned instance_id,
void *output_buffer )
{
struct translate_generic *tg = translate_generic(translate);
char *vert = output_buffer;
unsigned i;
for (i = 0; i < count; i++) {
generic_run_one(tg, *elts++, instance_id, vert);
vert += tg->translate.key.output_stride;
}
}
static void PIPE_CDECL generic_run_elts8( struct translate *translate,
const uint8_t *elts,
unsigned count,
unsigned instance_id,
void *output_buffer )
{
struct translate_generic *tg = translate_generic(translate);
char *vert = output_buffer;
unsigned i;
for (i = 0; i < count; i++) {
generic_run_one(tg, *elts++, instance_id, vert);
vert += tg->translate.key.output_stride;
}
}
static void PIPE_CDECL generic_run( struct translate *translate,
unsigned start,
unsigned count,
@ -498,6 +530,8 @@ struct translate *translate_generic_create( const struct translate_key *key )
tg->translate.release = generic_release;
tg->translate.set_buffer = generic_set_buffer;
tg->translate.run_elts = generic_run_elts;
tg->translate.run_elts16 = generic_run_elts16;
tg->translate.run_elts8 = generic_run_elts8;
tg->translate.run = generic_run;
for (i = 0; i < key->nr_elements; i++) {

View File

@ -67,6 +67,8 @@ struct translate_sse {
struct x86_function linear_func;
struct x86_function elt_func;
struct x86_function elt16_func;
struct x86_function elt8_func;
struct x86_function *func;
boolean loaded_identity;
@ -362,7 +364,7 @@ static boolean translate_attr( struct translate_sse *p,
static boolean init_inputs( struct translate_sse *p,
boolean linear )
unsigned index_size )
{
unsigned i;
struct x86_reg instance_id = x86_make_disp(p->machine_EDX,
@ -372,7 +374,7 @@ static boolean init_inputs( struct translate_sse *p,
struct translate_buffer_varient *varient = &p->buffer_varient[i];
struct translate_buffer *buffer = &p->buffer[varient->buffer_index];
if (linear || varient->instance_divisor) {
if (!index_size || varient->instance_divisor) {
struct x86_reg buf_stride = x86_make_disp(p->machine_EDX,
get_offset(p, &buffer->stride));
struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX,
@ -421,7 +423,7 @@ static boolean init_inputs( struct translate_sse *p,
/* In the linear case, keep the buffer pointer instead of the
* index number.
*/
if (linear && p->nr_buffer_varients == 1)
if (!index_size && p->nr_buffer_varients == 1)
x86_mov(p->func, elt, tmp_EAX);
else
x86_mov(p->func, buf_ptr, tmp_EAX);
@ -433,7 +435,7 @@ static boolean init_inputs( struct translate_sse *p,
static struct x86_reg get_buffer_ptr( struct translate_sse *p,
boolean linear,
unsigned index_size,
unsigned var_idx,
struct x86_reg elt )
{
@ -441,10 +443,10 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p,
return x86_make_disp(p->machine_EDX,
get_offset(p, &p->instance_id));
}
if (linear && p->nr_buffer_varients == 1) {
if (!index_size && p->nr_buffer_varients == 1) {
return p->idx_EBX;
}
else if (linear || p->buffer_varient[var_idx].instance_divisor) {
else if (!index_size || p->buffer_varient[var_idx].instance_divisor) {
struct x86_reg ptr = p->tmp_EAX;
struct x86_reg buf_ptr =
x86_make_disp(p->machine_EDX,
@ -469,8 +471,19 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p,
/* Calculate pointer to current attrib:
*/
x86_mov(p->func, ptr, buf_stride);
x86_imul(p->func, ptr, elt);
switch(index_size)
{
case 1:
x86_movzx8(p->func, ptr, elt);
break;
case 2:
x86_movzx16(p->func, ptr, elt);
break;
case 4:
x86_mov(p->func, ptr, elt);
break;
}
x86_imul(p->func, ptr, buf_stride);
x86_add(p->func, ptr, buf_base_ptr);
return ptr;
}
@ -479,9 +492,9 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p,
static boolean incr_inputs( struct translate_sse *p,
boolean linear )
unsigned index_size )
{
if (linear && p->nr_buffer_varients == 1) {
if (!index_size && p->nr_buffer_varients == 1) {
struct x86_reg stride = x86_make_disp(p->machine_EDX,
get_offset(p, &p->buffer[0].stride));
@ -490,7 +503,7 @@ static boolean incr_inputs( struct translate_sse *p,
sse_prefetchnta(p->func, x86_make_disp(p->idx_EBX, 192));
}
}
else if (linear) {
else if (!index_size) {
unsigned i;
/* Is this worthwhile??
@ -511,7 +524,7 @@ static boolean incr_inputs( struct translate_sse *p,
}
}
else {
x86_lea(p->func, p->idx_EBX, x86_make_disp(p->idx_EBX, 4));
x86_lea(p->func, p->idx_EBX, x86_make_disp(p->idx_EBX, index_size));
}
return TRUE;
@ -536,7 +549,7 @@ static boolean incr_inputs( struct translate_sse *p,
*/
static boolean build_vertex_emit( struct translate_sse *p,
struct x86_function *func,
boolean linear )
unsigned index_size )
{
int fixup, label;
unsigned j;
@ -585,13 +598,13 @@ static boolean build_vertex_emit( struct translate_sse *p,
/* always load, needed or not:
*/
init_inputs(p, linear);
init_inputs(p, index_size);
/* Note address for loop jump
*/
label = x86_get_label(p->func);
{
struct x86_reg elt = linear ? p->idx_EBX : x86_deref(p->idx_EBX);
struct x86_reg elt = !index_size ? p->idx_EBX : x86_deref(p->idx_EBX);
int last_varient = -1;
struct x86_reg vb;
@ -603,7 +616,7 @@ static boolean build_vertex_emit( struct translate_sse *p,
*/
if (varient != last_varient) {
last_varient = varient;
vb = get_buffer_ptr(p, linear, varient, elt);
vb = get_buffer_ptr(p, index_size, varient, elt);
}
if (!translate_attr( p, a,
@ -621,7 +634,7 @@ static boolean build_vertex_emit( struct translate_sse *p,
/* Incr index
*/
incr_inputs( p, linear );
incr_inputs( p, index_size );
}
/* decr count, loop if not zero
@ -736,10 +749,16 @@ struct translate *translate_sse2_create( const struct translate_key *key )
if (0) debug_printf("nr_buffers: %d\n", p->nr_buffers);
if (!build_vertex_emit(p, &p->linear_func, TRUE))
if (!build_vertex_emit(p, &p->linear_func, 0))
goto fail;
if (!build_vertex_emit(p, &p->elt_func, FALSE))
if (!build_vertex_emit(p, &p->elt_func, 4))
goto fail;
if (!build_vertex_emit(p, &p->elt16_func, 2))
goto fail;
if (!build_vertex_emit(p, &p->elt8_func, 1))
goto fail;
p->translate.run = (void*)x86_get_func(&p->linear_func);
@ -750,6 +769,14 @@ struct translate *translate_sse2_create( const struct translate_key *key )
if (p->translate.run_elts == NULL)
goto fail;
p->translate.run_elts16 = (void*)x86_get_func(&p->elt16_func);
if (p->translate.run_elts16 == NULL)
goto fail;
p->translate.run_elts8 = (void*)x86_get_func(&p->elt8_func);
if (p->translate.run_elts8 == NULL)
goto fail;
return &p->translate;
fail: